From 20509f1bc553ed7fafa88fa8d01c6212d1876d9f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Aug 2005 16:25:34 -0700
Subject: NFS: Drop inode after rename

 When doing a rename on top of an existing file that is not in use,
 the inode of the overwritten file will remain in the icache.

 The fix is to decrement i_nlink of the overwritten inode, like we
 do for unlink, rmdir etc already.

 Problem diagnosed by Olaf Kirch. This patch is a slight variation
 on his fix.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2df639f143e8..94a7fcee0624 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1539,7 +1539,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 #endif
 			goto out;
 		}
-	}
+	} else
+		new_inode->i_nlink--;
 
 go_ahead:
 	/*
-- 
cgit v1.2.3-58-ga151


From 449231d6ddf50ca46b7fb2f76ecf790135222913 Mon Sep 17 00:00:00 2001
From: Olaf Kirch <okir@suse.de>
Date: Thu, 25 Aug 2005 16:25:35 -0700
Subject: From: Olaf Kirch <okir@suse.de> [PATCH] Fix miscompare in
 __posix_lock_file

 If an application requests the same lock twice, the
 kernel should just leave the existing lock in place.
 Currently, it will install a second lock of the same type.

 Signed-off-by: Olaf Kirch <okir@suse.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index f7daa5f48949..7eb1d77b9204 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -829,12 +829,16 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
 		/* Detect adjacent or overlapping regions (if same lock type)
 		 */
 		if (request->fl_type == fl->fl_type) {
+			/* In all comparisons of start vs end, use
+			 * "start - 1" rather than "end + 1". If end
+			 * is OFFSET_MAX, end + 1 will become negative.
+			 */
 			if (fl->fl_end < request->fl_start - 1)
 				goto next_lock;
 			/* If the next lock in the list has entirely bigger
 			 * addresses than the new one, insert the lock here.
 			 */
-			if (fl->fl_start > request->fl_end + 1)
+			if (fl->fl_start - 1 > request->fl_end)
 				break;
 
 			/* If we come here, the new and old lock are of the
-- 
cgit v1.2.3-58-ga151


From 9aa48b7e270d13c8781414dce081a42cae20a80d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Aug 2005 16:25:35 -0700
Subject: NFS: Don't expose internal READDIR errors to userspace

 Fixes a condition whereby the kernel is returning the non-POSIX error
 EBADCOOKIE to userspace.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 94a7fcee0624..c70eabd6d179 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -565,8 +565,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		}
 	}
 	unlock_kernel();
-	if (desc->error < 0)
-		return desc->error;
 	if (res < 0)
 		return res;
 	return 0;
-- 
cgit v1.2.3-58-ga151


From eab5c084b858fd95a873fc2b97de9a9ad937b4ed Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@citi.umich.edu>
Date: Thu, 11 Aug 2005 16:25:14 -0400
Subject: [PATCH] NFS: use a constant value for TCP retransmit timeouts

 Implement a best practice: don't use exponential backoff when computing
 retransmit timeout values on TCP connections, but simply retransmit
 at regular intervals.

 This also fixes a bug introduced when xprt_reset_majortimeo() was added.

 Test-plan:
 Enable RPC debugging and watch timeout behavior on a NFS/TCP mount.

 Version: Thu, 11 Aug 2005 16:02:19 -0400

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c    | 73 ++++++++++++++++++++++++++-----------------------------
 net/sunrpc/xprt.c |  4 +--
 2 files changed, 37 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6922469d6fc5..b6a1ca508e60 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -358,6 +358,35 @@ out_no_root:
 	return no_root_error;
 }
 
+static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned int timeo, unsigned int retrans)
+{
+	to->to_initval = timeo * HZ / 10;
+	to->to_retries = retrans;
+	if (!to->to_retries)
+		to->to_retries = 2;
+
+	switch (proto) {
+	case IPPROTO_TCP:
+		if (!to->to_initval)
+			to->to_initval = 60 * HZ;
+		if (to->to_initval > RPC_MAX_TCP_TIMEOUT)
+			to->to_initval = RPC_MAX_TCP_TIMEOUT;
+		to->to_increment = to->to_initval;
+		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+		to->to_exponential = 0;
+		break;
+	case IPPROTO_UDP:
+	default:
+		if (!to->to_initval)
+			to->to_initval = 11 * HZ / 10;
+		if (to->to_initval > RPC_MAX_UDP_TIMEOUT)
+			to->to_initval = RPC_MAX_UDP_TIMEOUT;
+		to->to_maxval = RPC_MAX_UDP_TIMEOUT;
+		to->to_exponential = 1;
+		break;
+	}
+}
+
 /*
  * Create an RPC client handle.
  */
@@ -367,22 +396,12 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 	struct rpc_timeout	timeparms;
 	struct rpc_xprt		*xprt = NULL;
 	struct rpc_clnt		*clnt = NULL;
-	int			tcp   = (data->flags & NFS_MOUNT_TCP);
-
-	/* Initialize timeout values */
-	timeparms.to_initval = data->timeo * HZ / 10;
-	timeparms.to_retries = data->retrans;
-	timeparms.to_maxval  = tcp ? RPC_MAX_TCP_TIMEOUT : RPC_MAX_UDP_TIMEOUT;
-	timeparms.to_exponential = 1;
+	int			proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
 
-	if (!timeparms.to_initval)
-		timeparms.to_initval = (tcp ? 600 : 11) * HZ / 10;
-	if (!timeparms.to_retries)
-		timeparms.to_retries = 5;
+	nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
 	/* create transport and client */
-	xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP,
-				 &server->addr, &timeparms);
+	xprt = xprt_create_proto(proto, &server->addr, &timeparms);
 	if (IS_ERR(xprt)) {
 		dprintk("%s: cannot create RPC transport. Error = %ld\n",
 				__FUNCTION__, PTR_ERR(xprt));
@@ -1674,7 +1693,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	struct rpc_clnt *clnt = NULL;
 	struct rpc_timeout timeparms;
 	rpc_authflavor_t authflavour;
-	int proto, err = -EIO;
+	int err = -EIO;
 
 	sb->s_blocksize_bits = 0;
 	sb->s_blocksize = 0;
@@ -1692,30 +1711,8 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 	server->acdirmax = data->acdirmax*HZ;
 
 	server->rpc_ops = &nfs_v4_clientops;
-	/* Initialize timeout values */
-
-	timeparms.to_initval = data->timeo * HZ / 10;
-	timeparms.to_retries = data->retrans;
-	timeparms.to_exponential = 1;
-	if (!timeparms.to_retries)
-		timeparms.to_retries = 5;
 
-	proto = data->proto;
-	/* Which IP protocol do we use? */
-	switch (proto) {
-	case IPPROTO_TCP:
-		timeparms.to_maxval  = RPC_MAX_TCP_TIMEOUT;
-		if (!timeparms.to_initval)
-			timeparms.to_initval = 600 * HZ / 10;
-		break;
-	case IPPROTO_UDP:
-		timeparms.to_maxval  = RPC_MAX_UDP_TIMEOUT;
-		if (!timeparms.to_initval)
-			timeparms.to_initval = 11 * HZ / 10;
-		break;
-	default:
-		return -EINVAL;
-	}
+	nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
 	clp = nfs4_get_client(&server->addr.sin_addr);
 	if (!clp) {
@@ -1740,7 +1737,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
 	down_write(&clp->cl_sem);
 	if (IS_ERR(clp->cl_rpcclient)) {
-		xprt = xprt_create_proto(proto, &server->addr, &timeparms);
+		xprt = xprt_create_proto(data->proto, &server->addr, &timeparms);
 		if (IS_ERR(xprt)) {
 			up_write(&clp->cl_sem);
 			err = PTR_ERR(xprt);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b28ea0cc0cb7..0e4ffdaa0129 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1453,7 +1453,7 @@ xprt_default_timeout(struct rpc_timeout *to, int proto)
 	if (proto == IPPROTO_UDP)
 		xprt_set_timeout(to, 5,  5 * HZ);
 	else
-		xprt_set_timeout(to, 5, 60 * HZ);
+		xprt_set_timeout(to, 2, 60 * HZ);
 }
 
 /*
@@ -1464,7 +1464,7 @@ xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
 {
 	to->to_initval   = 
 	to->to_increment = incr;
-	to->to_maxval    = incr * retr;
+	to->to_maxval    = to->to_initval + (incr * retr);
 	to->to_retries   = retr;
 	to->to_exponential = 0;
 }
-- 
cgit v1.2.3-58-ga151


From 43118c29dea2b23798bd42a147015cceee7fa885 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Thu, 25 Aug 2005 16:25:49 -0700
Subject: [PATCH] RPC: get rid of xprt->stream

 Now we can fix up the last few places that use the "xprt->stream"
 variable, and get rid of it from the rpc_xprt structure.

 Test-plan:
 Destructive testing (unplugging the network temporarily).  Connectathon
 with UDP and TCP.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c             |  3 +--
 include/linux/sunrpc/xprt.h |  3 +--
 net/sunrpc/xprt.c           |  3 +--
 net/sunrpc/xprtsock.c       | 28 ++++++++++++++++++----------
 4 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 82c77df81c5f..7901f5b8092c 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -173,11 +173,10 @@ nlm_bind_host(struct nlm_host *host)
 
 	/* If we've already created an RPC client, check whether
 	 * RPC rebind is required
-	 * Note: why keep rebinding if we're on a tcp connection?
 	 */
 	if ((clnt = host->h_rpcclnt) != NULL) {
 		xprt = clnt->cl_xprt;
-		if (!xprt->stream && time_after_eq(jiffies, host->h_nextrebind)) {
+		if (time_after_eq(jiffies, host->h_nextrebind)) {
 			clnt->cl_port = 0;
 			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
 			dprintk("lockd: next rebind in %ld jiffies\n",
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 966c456a0f6d..c9477f022efb 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -168,8 +168,7 @@ struct rpc_xprt {
 	unsigned long		state;		/* transport state */
 	unsigned char		shutdown   : 1,	/* being shut down */
 				nocong	   : 1,	/* no congestion control */
-				resvport   : 1, /* use a reserved port */
-				stream     : 1;	/* TCP */
+				resvport   : 1; /* use a reserved port */
 
 	/*
 	 * XID
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 31ef7dc7eed6..43fef7626442 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -630,8 +630,7 @@ void xprt_transmit(struct rpc_task *task)
 	case -ENOTCONN:
 		return;
 	default:
-		if (xprt->stream)
-			xprt_disconnect(xprt);
+		break;
 	}
 	xprt_release_write(xprt, task);
 	return;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index aaf053b1a0c4..5bb6fed3df34 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -356,6 +356,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	default:
 		dprintk("RPC:      sendmsg returned unrecognized error %d\n",
 			-status);
+		xprt_disconnect(xprt);
 		break;
 	}
 
@@ -826,19 +827,17 @@ static void xs_tcp_write_space(struct sock *sk)
 }
 
 /**
- * xs_set_buffer_size - set send and receive limits
+ * xs_udp_set_buffer_size - set send and receive limits
  * @xprt: generic transport
  *
  * Set socket send and receive limits based on the
  * sndsize and rcvsize fields in the generic transport
- * structure. This applies only to UDP sockets.
+ * structure.
  */
-static void xs_set_buffer_size(struct rpc_xprt *xprt)
+static void xs_udp_set_buffer_size(struct rpc_xprt *xprt)
 {
 	struct sock *sk = xprt->inet;
 
-	if (xprt->stream)
-		return;
 	if (xprt->rcvsize) {
 		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
 		sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs *  2;
@@ -850,6 +849,17 @@ static void xs_set_buffer_size(struct rpc_xprt *xprt)
 	}
 }
 
+/**
+ * xs_tcp_set_buffer_size - set send and receive limits
+ * @xprt: generic transport
+ *
+ * Nothing to do for TCP.
+ */
+static void xs_tcp_set_buffer_size(struct rpc_xprt *xprt)
+{
+	return;
+}
+
 static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sockaddr_in myaddr = {
@@ -928,7 +938,7 @@ static void xs_udp_connect_worker(void *args)
 
 		write_unlock_bh(&sk->sk_callback_lock);
 	}
-	xs_set_buffer_size(xprt);
+	xs_udp_set_buffer_size(xprt);
 	status = 0;
 out:
 	xprt_wake_pending_tasks(xprt, status);
@@ -1034,7 +1044,7 @@ static void xs_connect(struct rpc_task *task)
 }
 
 static struct rpc_xprt_ops xs_udp_ops = {
-	.set_buffer_size	= xs_set_buffer_size,
+	.set_buffer_size	= xs_udp_set_buffer_size,
 	.connect		= xs_connect,
 	.send_request		= xs_udp_send_request,
 	.close			= xs_close,
@@ -1042,7 +1052,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 };
 
 static struct rpc_xprt_ops xs_tcp_ops = {
-	.set_buffer_size	= xs_set_buffer_size,
+	.set_buffer_size	= xs_tcp_set_buffer_size,
 	.connect		= xs_connect,
 	.send_request		= xs_tcp_send_request,
 	.close			= xs_close,
@@ -1074,7 +1084,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->prot = IPPROTO_UDP;
 	xprt->port = XS_MAX_RESVPORT;
 	xprt->tsh_size = 0;
-	xprt->stream = 0;
 	xprt->nocong = 0;
 	xprt->cwnd = RPC_INITCWND;
 	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
@@ -1115,7 +1124,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->prot = IPPROTO_TCP;
 	xprt->port = XS_MAX_RESVPORT;
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
-	xprt->stream = 1;
 	xprt->nocong = 1;
 	xprt->cwnd = RPC_MAXCWND(xprt);
 	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
-- 
cgit v1.2.3-58-ga151


From ed63c003701a314c4893c11eceb9d68f8f46c662 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Thu, 25 Aug 2005 16:25:53 -0700
Subject: [PATCH] RPC: remove xprt->nocong

 Get rid of the "xprt->nocong" variable.

 Test-plan:
 Use WAN simulation to cause sporadic bursty packet loss with UDP mounts.
 Look for significant regression in performance or client stability.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c             | 1 -
 include/linux/sunrpc/xprt.h | 1 -
 net/sunrpc/xprtsock.c       | 2 --
 3 files changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 7901f5b8092c..c4c8601096e0 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -188,7 +188,6 @@ nlm_bind_host(struct nlm_host *host)
 			goto forgetit;
 
 		xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout);
-		xprt->nocong = 1;	/* No congestion control for NLM */
 		xprt->resvport = 1;	/* NLM requires a reserved port */
 
 		/* Existing NLM servers accept AUTH_UNIX only */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 64e77658fa30..559fb471f6f2 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -172,7 +172,6 @@ struct rpc_xprt {
 	unsigned int		max_reqs;	/* total slots */
 	unsigned long		state;		/* transport state */
 	unsigned char		shutdown   : 1,	/* being shut down */
-				nocong	   : 1,	/* no congestion control */
 				resvport   : 1; /* use a reserved port */
 
 	/*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6c2f5dcea416..7e5e020fe78d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1100,7 +1100,6 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->prot = IPPROTO_UDP;
 	xprt->port = XS_MAX_RESVPORT;
 	xprt->tsh_size = 0;
-	xprt->nocong = 0;
 	xprt->cwnd = RPC_INITCWND;
 	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
@@ -1140,7 +1139,6 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->prot = IPPROTO_TCP;
 	xprt->port = XS_MAX_RESVPORT;
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
-	xprt->nocong = 1;
 	xprt->cwnd = RPC_MAXCWND(xprt);
 	xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
-- 
cgit v1.2.3-58-ga151


From 03bf4b707eee06706c9db343dd5c905b7ee47ed2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Thu, 25 Aug 2005 16:25:55 -0700
Subject: [PATCH] RPC: parametrize various transport connect timeouts

 Each transport implementation can now set unique bind, connect,
 reestablishment, and idle timeout values.  These are variables,
 allowing the values to be modified dynamically.  This permits
 exponential backoff of any of these values, for instance.

 As an example, we implement exponential backoff for the connection
 reestablishment timeout.

 Test-plan:
 Destructive testing (unplugging the network temporarily).  Connectathon
 with UDP and TCP.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c              | 10 +++----
 include/linux/nfs_fs.h      |  4 +++
 include/linux/sunrpc/xprt.h | 29 ++++---------------
 net/sunrpc/clnt.c           |  2 +-
 net/sunrpc/xprt.c           |  5 ++--
 net/sunrpc/xprtsock.c       | 68 +++++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 84 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b6a1ca508e60..062911e7ceb5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -369,8 +369,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned
 	case IPPROTO_TCP:
 		if (!to->to_initval)
 			to->to_initval = 60 * HZ;
-		if (to->to_initval > RPC_MAX_TCP_TIMEOUT)
-			to->to_initval = RPC_MAX_TCP_TIMEOUT;
+		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
+			to->to_initval = NFS_MAX_TCP_TIMEOUT;
 		to->to_increment = to->to_initval;
 		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
 		to->to_exponential = 0;
@@ -379,9 +379,9 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, unsigned
 	default:
 		if (!to->to_initval)
 			to->to_initval = 11 * HZ / 10;
-		if (to->to_initval > RPC_MAX_UDP_TIMEOUT)
-			to->to_initval = RPC_MAX_UDP_TIMEOUT;
-		to->to_maxval = RPC_MAX_UDP_TIMEOUT;
+		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
+			to->to_initval = NFS_MAX_UDP_TIMEOUT;
+		to->to_maxval = NFS_MAX_UDP_TIMEOUT;
 		to->to_exponential = 1;
 		break;
 	}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9a6047ff1b25..7bac2785c6e4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -41,6 +41,10 @@
 #define NFS_MAX_FILE_IO_BUFFER_SIZE	32768
 #define NFS_DEF_FILE_IO_BUFFER_SIZE	4096
 
+/* Default timeout values */
+#define NFS_MAX_UDP_TIMEOUT	(60*HZ)
+#define NFS_MAX_TCP_TIMEOUT	(600*HZ)
+
 /*
  * superblock magic number for NFS
  */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 9d9266cf8a36..2543adf18551 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -22,28 +22,6 @@ extern unsigned int xprt_tcp_slot_table_entries;
 #define RPC_DEF_SLOT_TABLE	(16U)
 #define RPC_MAX_SLOT_TABLE	(128U)
 
-/* Default timeout values */
-#define RPC_MAX_UDP_TIMEOUT	(60*HZ)
-#define RPC_MAX_TCP_TIMEOUT	(600*HZ)
-
-/*
- * Wait duration for an RPC TCP connection to be established.  Solaris
- * NFS over TCP uses 60 seconds, for example, which is in line with how
- * long a server takes to reboot.
- */
-#define RPC_CONNECT_TIMEOUT	(60*HZ)
-
-/*
- * Delay an arbitrary number of seconds before attempting to reconnect
- * after an error.
- */
-#define RPC_REESTABLISH_TIMEOUT	(15*HZ)
-
-/*
- * RPC transport idle timeout.
- */
-#define RPC_IDLE_DISCONNECT_TIMEOUT	(5*60*HZ)
-
 /*
  * RPC call and reply header size as number of 32bit words (verifier
  * size computed separately)
@@ -182,14 +160,19 @@ struct rpc_xprt {
 	/*
 	 * Connection of transports
 	 */
+	unsigned long		connect_timeout,
+				bind_timeout,
+				reestablish_timeout;
 	struct work_struct	connect_worker;
 	unsigned short		port;
+
 	/*
 	 * Disconnection of idle transports
 	 */
 	struct work_struct	task_cleanup;
 	struct timer_list	timer;
-	unsigned long		last_used;
+	unsigned long		last_used,
+				idle_timeout;
 
 	/*
 	 * Send stuff
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cc1b773a79d3..24b44e73f391 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -740,7 +740,7 @@ call_bind(struct rpc_task *task)
 	task->tk_action = call_connect;
 	if (!clnt->cl_port) {
 		task->tk_action = call_bind_status;
-		task->tk_timeout = RPC_CONNECT_TIMEOUT;
+		task->tk_timeout = task->tk_xprt->bind_timeout;
 		rpc_getport(task, clnt);
 	}
 }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 0458319a1bdd..215be0d0ef6b 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -551,7 +551,7 @@ void xprt_connect(struct rpc_task *task)
 		if (task->tk_rqstp)
 			task->tk_rqstp->rq_bytes_sent = 0;
 
-		task->tk_timeout = RPC_CONNECT_TIMEOUT;
+		task->tk_timeout = xprt->connect_timeout;
 		rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
 		xprt->ops->connect(task);
 	}
@@ -763,7 +763,6 @@ void xprt_transmit(struct rpc_task *task)
 
 	switch (status) {
 	case -ECONNREFUSED:
-		task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
 		rpc_sleep_on(&xprt->sending, task, NULL, NULL);
 	case -EAGAIN:
 	case -ENOTCONN:
@@ -857,7 +856,7 @@ void xprt_release(struct rpc_task *task)
 	xprt->last_used = jiffies;
 	if (list_empty(&xprt->recv) && !xprt->shutdown)
 		mod_timer(&xprt->timer,
-				xprt->last_used + RPC_IDLE_DISCONNECT_TIMEOUT);
+				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
 	task->tk_rqstp = NULL;
 	memset(req, 0, sizeof(*req));	/* mark unused */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 88ac71fcd335..06c2d95484e0 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -41,6 +41,50 @@
  */
 #define XS_SENDMSG_RETRY	(10U)
 
+/*
+ * Time out for an RPC UDP socket connect.  UDP socket connects are
+ * synchronous, but we set a timeout anyway in case of resource
+ * exhaustion on the local host.
+ */
+#define XS_UDP_CONN_TO		(5U * HZ)
+
+/*
+ * Wait duration for an RPC TCP connection to be established.  Solaris
+ * NFS over TCP uses 60 seconds, for example, which is in line with how
+ * long a server takes to reboot.
+ */
+#define XS_TCP_CONN_TO		(60U * HZ)
+
+/*
+ * Wait duration for a reply from the RPC portmapper.
+ */
+#define XS_BIND_TO		(60U * HZ)
+
+/*
+ * Delay if a UDP socket connect error occurs.  This is most likely some
+ * kind of resource problem on the local host.
+ */
+#define XS_UDP_REEST_TO		(2U * HZ)
+
+/*
+ * The reestablish timeout allows clients to delay for a bit before attempting
+ * to reconnect to a server that just dropped our connection.
+ *
+ * We implement an exponential backoff when trying to reestablish a TCP
+ * transport connection with the server.  Some servers like to drop a TCP
+ * connection when they are overworked, so we start with a short timeout and
+ * increase over time if the server is down or not responding.
+ */
+#define XS_TCP_INIT_REEST_TO	(3U * HZ)
+#define XS_TCP_MAX_REEST_TO	(5U * 60 * HZ)
+
+/*
+ * TCP idle timeout; client drops the transport socket if it is idle
+ * for this long.  Note that we also timeout UDP sockets to prevent
+ * holding port numbers when there is no RPC traffic.
+ */
+#define XS_IDLE_DISC_TO		(5U * 60 * HZ)
+
 #ifdef RPC_DEBUG
 # undef  RPC_DEBUG_DATA
 # define RPCDBG_FACILITY	RPCDBG_TRANS
@@ -739,6 +783,7 @@ static void xs_tcp_state_change(struct sock *sk)
 			xprt->tcp_reclen = 0;
 			xprt->tcp_copied = 0;
 			xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
+			xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
 			xprt_wake_pending_tasks(xprt, 0);
 		}
 		spin_unlock_bh(&xprt->transport_lock);
@@ -1066,6 +1111,13 @@ out_clear:
  * @task: address of RPC task that manages state of connect request
  *
  * TCP: If the remote end dropped the connection, delay reconnecting.
+ *
+ * UDP socket connects are synchronous, but we use a work queue anyway
+ * to guarantee that even unprivileged user processes can set up a
+ * socket on a privileged port.
+ *
+ * If a UDP socket connect fails, the delay behavior here prevents
+ * retry floods (hard mounts).
  */
 static void xs_connect(struct rpc_task *task)
 {
@@ -1075,9 +1127,13 @@ static void xs_connect(struct rpc_task *task)
 		return;
 
 	if (xprt->sock != NULL) {
-		dprintk("RPC:      xs_connect delayed xprt %p\n", xprt);
+		dprintk("RPC:      xs_connect delayed xprt %p for %lu seconds\n",
+				xprt, xprt->reestablish_timeout / HZ);
 		schedule_delayed_work(&xprt->connect_worker,
-					RPC_REESTABLISH_TIMEOUT);
+					xprt->reestablish_timeout);
+		xprt->reestablish_timeout <<= 1;
+		if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
+			xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
 	} else {
 		dprintk("RPC:      xs_connect scheduled xprt %p\n", xprt);
 		schedule_work(&xprt->connect_worker);
@@ -1139,6 +1195,10 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
 	INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
+	xprt->bind_timeout = XS_BIND_TO;
+	xprt->connect_timeout = XS_UDP_CONN_TO;
+	xprt->reestablish_timeout = XS_UDP_REEST_TO;
+	xprt->idle_timeout = XS_IDLE_DISC_TO;
 
 	xprt->ops = &xs_udp_ops;
 
@@ -1176,6 +1236,10 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
+	xprt->bind_timeout = XS_BIND_TO;
+	xprt->connect_timeout = XS_TCP_CONN_TO;
+	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+	xprt->idle_timeout = XS_IDLE_DISC_TO;
 
 	xprt->ops = &xs_tcp_ops;
 
-- 
cgit v1.2.3-58-ga151


From 278c995c8a153bb2a9bc427e931cfb9c8034c9d7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Sun, 24 Jul 2005 23:53:01 +0100
Subject: [PATCH] RPC,NFS: new rpc_pipefs patch

 Currently rpc_mkdir/rpc_rmdir and rpc_mkpipe/mk_unlink have an API that's
 a little unfortunate.  They take a path relative to the rpc_pipefs root and
 thus need to perform a full lookup.  If you look at debugfs or usbfs they
 always store the dentry for directories they created and thus can pass in
 a dentry + single pathname component pair into their equivalents of the
 above functions.

 And in fact rpc_pipefs actually stores a dentry for all but one component so
 this change not only simplifies the core rpc_pipe code but also the callers.

 Unfortuntately this code path is only used by the NFS4 idmapper and
 AUTH_GSSAPI for which I don't have a test enviroment.  Could someone give
 it a spin?  It's the last bit needed before we can rework the
 lookup_hash API

 Signed-off-by: Christoph Hellwig <hch@lst.de>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c                     |  10 +-
 include/linux/sunrpc/clnt.h        |   2 +-
 include/linux/sunrpc/rpc_pipe_fs.h |   9 +-
 net/sunrpc/auth_gss/auth_gss.c     |   9 +-
 net/sunrpc/clnt.c                  |  53 +++++---
 net/sunrpc/rpc_pipe.c              | 268 +++++++++++++------------------------
 6 files changed, 142 insertions(+), 209 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index ffb8df91dc34..1d0a5bf0d264 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -66,7 +66,6 @@ struct idmap_hashtable {
 };
 
 struct idmap {
-	char                  idmap_path[48];
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
@@ -102,11 +101,8 @@ nfs_idmap_new(struct nfs4_client *clp)
 
 	memset(idmap, 0, sizeof(*idmap));
 
-	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
-	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
-
-        idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
-	    idmap, &idmap_upcall_ops, 0);
+	idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry,
+			"idmap", idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
 		kfree(idmap);
 		return;
@@ -128,7 +124,7 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
 	if (!idmap)
 		return;
-	rpc_unlink(idmap->idmap_path);
+	rpc_unlink(idmap->idmap_dentry);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
 }
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ab151bbb66df..b5b51c196690 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -59,7 +59,7 @@ struct rpc_clnt {
 
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
-	char			cl_pathname[30];/* Path in rpc_pipe_fs */
+	struct dentry *		__cl_parent_dentry;
 	struct dentry *		cl_dentry;	/* inode */
 	struct rpc_clnt *	cl_parent;	/* Points to parent of clones */
 	struct rpc_rtt		cl_rtt_default;
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 63929349571f..63878d05c9a9 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -41,10 +41,11 @@ RPC_I(struct inode *inode)
 
 extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
-extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
-extern int rpc_rmdir(char *);
-extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
-extern int rpc_unlink(char *);
+extern struct dentry *rpc_mkdir(struct dentry *, char *, struct rpc_clnt *);
+extern void rpc_rmdir(struct dentry *);
+extern struct dentry *rpc_mkpipe(struct dentry *, char *, void *,
+		struct rpc_pipe_ops *, int flags);
+extern void rpc_unlink(struct dentry *);
 
 #endif
 #endif
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index d2b08f16c257..bd2555139fa9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -87,7 +87,6 @@ struct gss_auth {
 	struct list_head upcalls;
 	struct rpc_clnt *client;
 	struct dentry *dentry;
-	char path[48];
 	spinlock_t lock;
 };
 
@@ -690,10 +689,8 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 	if (err)
 		goto err_put_mech;
 
-	snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
-			clnt->cl_pathname,
-			gss_auth->mech->gm_name);
-	gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+	gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
+			clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
 	if (IS_ERR(gss_auth->dentry)) {
 		err = PTR_ERR(gss_auth->dentry);
 		goto err_put_mech;
@@ -718,7 +715,7 @@ gss_destroy(struct rpc_auth *auth)
 		auth, auth->au_flavor);
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
-	rpc_unlink(gss_auth->path);
+	rpc_unlink(gss_auth->dentry);
 	gss_mech_put(gss_auth->mech);
 
 	rpcauth_free_credcache(auth);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 5a8f01d726e9..63bf591310e0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -67,26 +67,42 @@ static u32 *	call_verify(struct rpc_task *task);
 static int
 rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 {
-	static uint32_t clntid;
+	static unsigned int clntid;
+	char name[128];
 	int error;
 
 	if (dir_name == NULL)
 		return 0;
-	for (;;) {
-		snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
-				"%s/clnt%x", dir_name,
-				(unsigned int)clntid++);
-		clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0';
-		clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt);
-		if (!IS_ERR(clnt->cl_dentry))
-			return 0;
+
+ retry_parent:
+	clnt->__cl_parent_dentry = rpc_mkdir(NULL, dir_name, NULL);
+	if (IS_ERR(clnt->__cl_parent_dentry)) {
+		error = PTR_ERR(clnt->__cl_parent_dentry);
+		if (error == -EEXIST)
+			goto retry_parent; /* XXX(hch): WTF? */
+	
+		printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
+				dir_name, error);
+		return error;
+	}
+
+
+ retry_child:
+	snprintf(name, sizeof(name), "clnt%x", clntid++);
+	name[sizeof(name) - 1] = '\0';
+
+	clnt->cl_dentry = rpc_mkdir(clnt->__cl_parent_dentry, name, clnt);
+	if (IS_ERR(clnt->cl_dentry)) {
 		error = PTR_ERR(clnt->cl_dentry);
-		if (error != -EEXIST) {
-			printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
-					clnt->cl_pathname, error);
-			return error;
-		}
+		if (error == -EEXIST)
+			goto retry_child;
+		printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
+				name, error);
+		rpc_rmdir(clnt->__cl_parent_dentry);
+		return error;
 	}
+
+	return 0;
 }
 
 /*
@@ -174,7 +190,8 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 	return clnt;
 
 out_no_auth:
-	rpc_rmdir(clnt->cl_pathname);
+	rpc_rmdir(clnt->cl_dentry);
+	rpc_rmdir(clnt->__cl_parent_dentry);
 out_no_path:
 	if (clnt->cl_server != clnt->cl_inline_name)
 		kfree(clnt->cl_server);
@@ -302,8 +319,10 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		rpc_destroy_client(clnt->cl_parent);
 		goto out_free;
 	}
-	if (clnt->cl_pathname[0])
-		rpc_rmdir(clnt->cl_pathname);
+	if (clnt->cl_dentry)
+		rpc_rmdir(clnt->cl_dentry);
+	if (clnt->__cl_parent_dentry)
+		rpc_rmdir(clnt->__cl_parent_dentry);
 	if (clnt->cl_xprt) {
 		xprt_destroy(clnt->cl_xprt);
 		clnt->cl_xprt = NULL;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index ded6c63f11ec..b382809726d8 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -414,38 +414,6 @@ rpc_put_mount(void)
 	simple_release_fs(&rpc_mount, &rpc_mount_count);
 }
 
-static int
-rpc_lookup_parent(char *path, struct nameidata *nd)
-{
-	if (path[0] == '\0')
-		return -ENOENT;
-	if (rpc_get_mount()) {
-		printk(KERN_WARNING "%s: %s failed to mount "
-			       "pseudofilesystem \n", __FILE__, __FUNCTION__);
-		return -ENODEV;
-	}
-	nd->mnt = mntget(rpc_mount);
-	nd->dentry = dget(rpc_mount->mnt_root);
-	nd->last_type = LAST_ROOT;
-	nd->flags = LOOKUP_PARENT;
-	nd->depth = 0;
-
-	if (path_walk(path, nd)) {
-		printk(KERN_WARNING "%s: %s failed to find path %s\n",
-				__FILE__, __FUNCTION__, path);
-		rpc_put_mount();
-		return -ENOENT;
-	}
-	return 0;
-}
-
-static void
-rpc_release_path(struct nameidata *nd)
-{
-	path_release(nd);
-	rpc_put_mount();
-}
-
 static struct inode *
 rpc_get_inode(struct super_block *sb, int mode)
 {
@@ -550,197 +518,149 @@ out_bad:
 	return -ENOMEM;
 }
 
-static int
-__rpc_mkdir(struct inode *dir, struct dentry *dentry)
+struct dentry *
+rpc_mkdir(struct dentry *parent, char *name, struct rpc_clnt *rpc_client)
 {
+	struct inode *dir;
+	struct dentry *dentry;
 	struct inode *inode;
-
-	inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR);
-	if (!inode)
-		goto out_err;
-	inode->i_ino = iunique(dir->i_sb, 100);
-	d_instantiate(dentry, inode);
-	dir->i_nlink++;
-	inode_dir_notify(dir, DN_CREATE);
-	rpc_get_mount();
-	return 0;
-out_err:
-	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
-			__FILE__, __FUNCTION__, dentry->d_name.name);
-	return -ENOMEM;
-}
-
-static int
-__rpc_rmdir(struct inode *dir, struct dentry *dentry)
-{
 	int error;
 
-	shrink_dcache_parent(dentry);
-	if (dentry->d_inode) {
-		rpc_close_pipes(dentry->d_inode);
-		rpc_inode_setowner(dentry->d_inode, NULL);
-	}
-	if ((error = simple_rmdir(dir, dentry)) != 0)
-		return error;
-	if (!error) {
-		inode_dir_notify(dir, DN_DELETE);
-		d_drop(dentry);
-		rpc_put_mount();
-	}
-	return 0;
-}
+	if (!parent)
+		parent = rpc_mount->mnt_root;
 
-static struct dentry *
-rpc_lookup_negative(char *path, struct nameidata *nd)
-{
-	struct dentry *dentry;
-	struct inode *dir;
-	int error;
-
-	if ((error = rpc_lookup_parent(path, nd)) != 0)
+	dir = parent->d_inode;
+	
+	error = rpc_get_mount();
+	if (error)
 		return ERR_PTR(error);
-	dir = nd->dentry->d_inode;
+
 	down(&dir->i_sem);
-	dentry = lookup_hash(&nd->last, nd->dentry);
+	dentry = lookup_one_len(name, parent, strlen(name));
 	if (IS_ERR(dentry))
-		goto out_err;
+		goto out_unlock;
 	if (dentry->d_inode) {
-		dput(dentry);
 		dentry = ERR_PTR(-EEXIST);
-		goto out_err;
+		goto out_dput;
 	}
-	return dentry;
-out_err:
-	up(&dir->i_sem);
-	rpc_release_path(nd);
-	return dentry;
-}
 
+	inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR);
+	if (!inode)
+		goto out_dput;
+	inode->i_ino = iunique(dir->i_sb, 100);
+	dir->i_nlink++;
+	RPC_I(dentry->d_inode)->private = rpc_client;
 
-struct dentry *
-rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
-{
-	struct nameidata nd;
-	struct dentry *dentry;
-	struct inode *dir;
-	int error;
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	up(&dir->i_sem);
+
+	inode_dir_notify(dir, DN_CREATE);
 
-	dentry = rpc_lookup_negative(path, &nd);
-	if (IS_ERR(dentry))
-		return dentry;
-	dir = nd.dentry->d_inode;
-	if ((error = __rpc_mkdir(dir, dentry)) != 0)
-		goto err_dput;
-	RPC_I(dentry->d_inode)->private = rpc_client;
 	error = rpc_populate(dentry, authfiles,
 			RPCAUTH_info, RPCAUTH_EOF);
 	if (error)
-		goto err_depopulate;
-out:
-	up(&dir->i_sem);
-	rpc_release_path(&nd);
+		goto out_depopulate;
+
 	return dentry;
-err_depopulate:
-	rpc_depopulate(dentry);
-	__rpc_rmdir(dir, dentry);
-err_dput:
+
+ out_depopulate:
+	rpc_rmdir(dentry);
+ out_dput:
 	dput(dentry);
-	printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n",
-			__FILE__, __FUNCTION__, path, error);
-	dentry = ERR_PTR(error);
-	goto out;
+ out_unlock:
+	up(&dir->i_sem);
+	rpc_put_mount();
+	return dentry;
 }
 
-int
-rpc_rmdir(char *path)
+void
+rpc_rmdir(struct dentry *dentry)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
-	struct inode *dir;
-	int error;
+	struct dentry *parent = dentry->d_parent;
 
-	if ((error = rpc_lookup_parent(path, &nd)) != 0)
-		return error;
-	dir = nd.dentry->d_inode;
-	down(&dir->i_sem);
-	dentry = lookup_hash(&nd.last, nd.dentry);
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
-		goto out_release;
-	}
 	rpc_depopulate(dentry);
-	error = __rpc_rmdir(dir, dentry);
-	dput(dentry);
-out_release:
-	up(&dir->i_sem);
-	rpc_release_path(&nd);
-	return error;
+
+	down(&parent->d_inode->i_sem);
+	if (dentry->d_inode) {
+		rpc_close_pipes(dentry->d_inode);
+		rpc_inode_setowner(dentry->d_inode, NULL);
+		simple_rmdir(parent->d_inode, dentry);
+	}
+	up(&parent->d_inode->i_sem);
+
+	inode_dir_notify(parent->d_inode, DN_DELETE);
+	rpc_put_mount();
 }
 
 struct dentry *
-rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
+rpc_mkpipe(struct dentry *parent, char *name, void *private,
+	   struct rpc_pipe_ops *ops, int flags)
 {
-	struct nameidata nd;
+	struct inode *dir = parent->d_inode;
 	struct dentry *dentry;
-	struct inode *dir, *inode;
+	struct inode *inode;
 	struct rpc_inode *rpci;
+	int error;
+
+	error = rpc_get_mount();
+	if (error)
+		return ERR_PTR(error);
 
-	dentry = rpc_lookup_negative(path, &nd);
+	down(&parent->d_inode->i_sem);
+	dentry = lookup_one_len(name, parent, strlen(name));
 	if (IS_ERR(dentry))
-		return dentry;
-	dir = nd.dentry->d_inode;
-	inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR);
-	if (!inode)
-		goto err_dput;
+		goto out_unlock;
+	if (dentry->d_inode) {
+		dentry = ERR_PTR(-EEXIST);
+		goto out_dput;
+	}
+
+	inode = rpc_get_inode(parent->d_inode->i_sb,
+			S_IFSOCK | S_IRUSR | S_IWUSR);
+	if (!inode) {
+		dentry = ERR_PTR(-ENOMEM);
+		goto out_dput;
+	}
+
 	inode->i_ino = iunique(dir->i_sb, 100);
 	inode->i_fop = &rpc_pipe_fops;
-	d_instantiate(dentry, inode);
+
 	rpci = RPC_I(inode);
 	rpci->private = private;
 	rpci->flags = flags;
 	rpci->ops = ops;
+
+	d_instantiate(dentry, inode);
+	dget(dentry);
+	up(&parent->d_inode->i_sem);
+
 	inode_dir_notify(dir, DN_CREATE);
-out:
-	up(&dir->i_sem);
-	rpc_release_path(&nd);
 	return dentry;
-err_dput:
+
+ out_dput:
 	dput(dentry);
-	dentry = ERR_PTR(-ENOMEM);
-	printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n",
-			__FILE__, __FUNCTION__, path, -ENOMEM);
-	goto out;
+ out_unlock:
+	up(&parent->d_inode->i_sem);
+	rpc_put_mount();
+	return dentry;
 }
 
-int
-rpc_unlink(char *path)
+void
+rpc_unlink(struct dentry *dentry)
 {
-	struct nameidata nd;
-	struct dentry *dentry;
-	struct inode *dir;
-	int error;
+	struct dentry *parent = dentry->d_parent;
 
-	if ((error = rpc_lookup_parent(path, &nd)) != 0)
-		return error;
-	dir = nd.dentry->d_inode;
-	down(&dir->i_sem);
-	dentry = lookup_hash(&nd.last, nd.dentry);
-	if (IS_ERR(dentry)) {
-		error = PTR_ERR(dentry);
-		goto out_release;
-	}
-	d_drop(dentry);
+	down(&parent->d_inode->i_sem);
 	if (dentry->d_inode) {
 		rpc_close_pipes(dentry->d_inode);
 		rpc_inode_setowner(dentry->d_inode, NULL);
-		error = simple_unlink(dir, dentry);
+		simple_unlink(parent->d_inode, dentry);
 	}
-	dput(dentry);
-	inode_dir_notify(dir, DN_DELETE);
-out_release:
-	up(&dir->i_sem);
-	rpc_release_path(&nd);
-	return error;
+	up(&parent->d_inode->i_sem);
+
+	inode_dir_notify(parent->d_inode, DN_DELETE);
+	rpc_put_mount();
 }
 
 /*
-- 
cgit v1.2.3-58-ga151


From 3063d8a16643190f9e12e9c7e9f1ca56f7e7934e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Aug 2005 16:25:57 -0700
Subject: NFS: Make /proc/mounts display the protocol used by NFSv4

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 062911e7ceb5..358d8ef8c087 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -595,7 +595,6 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
 		{ NFS_MOUNT_INTR, ",intr", "" },
 		{ NFS_MOUNT_POSIX, ",posix", "" },
-		{ NFS_MOUNT_TCP, ",tcp", ",udp" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", ",lock" },
@@ -604,6 +603,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	};
 	struct proc_nfs_info *nfs_infop;
 	struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+	char buf[12];
+	char *proto;
 
 	seq_printf(m, ",v%d", nfss->rpc_ops->version);
 	seq_printf(m, ",rsize=%d", nfss->rsize);
@@ -622,6 +623,18 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		else
 			seq_puts(m, nfs_infop->nostr);
 	}
+	switch (nfss->client->cl_xprt->prot) {
+		case IPPROTO_TCP:
+			proto = "tcp";
+			break;
+		case IPPROTO_UDP:
+			proto = "udp";
+			break;
+		default:
+			snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot);
+			proto = buf;
+	}
+	seq_printf(m, ",proto=%s", proto);
 	seq_puts(m, ",addr=");
 	seq_escape(m, nfss->hostname, " \t\n\\");
 	return 0;
-- 
cgit v1.2.3-58-ga151


From f134585a7343d71f9be7f0cf97e2145f21dd10c6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 23 Sep 2005 11:08:25 -0400
Subject: Revert "[PATCH] RPC,NFS: new rpc_pipefs patch"

This reverts 17f4e6febca160a9f9dd4bdece9784577a2f4524 commit.
---
 fs/nfs/idmap.c                     |  10 +-
 include/linux/sunrpc/clnt.h        |   2 +-
 include/linux/sunrpc/rpc_pipe_fs.h |   9 +-
 net/sunrpc/auth_gss/auth_gss.c     |   9 +-
 net/sunrpc/clnt.c                  |  53 +++-----
 net/sunrpc/rpc_pipe.c              | 268 ++++++++++++++++++++++++-------------
 6 files changed, 209 insertions(+), 142 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 1d0a5bf0d264..ffb8df91dc34 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -66,6 +66,7 @@ struct idmap_hashtable {
 };
 
 struct idmap {
+	char                  idmap_path[48];
 	struct dentry        *idmap_dentry;
 	wait_queue_head_t     idmap_wq;
 	struct idmap_msg      idmap_im;
@@ -101,8 +102,11 @@ nfs_idmap_new(struct nfs4_client *clp)
 
 	memset(idmap, 0, sizeof(*idmap));
 
-	idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry,
-			"idmap", idmap, &idmap_upcall_ops, 0);
+	snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
+	    "%s/idmap", clp->cl_rpcclient->cl_pathname);
+
+        idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path,
+	    idmap, &idmap_upcall_ops, 0);
         if (IS_ERR(idmap->idmap_dentry)) {
 		kfree(idmap);
 		return;
@@ -124,7 +128,7 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
 	if (!idmap)
 		return;
-	rpc_unlink(idmap->idmap_dentry);
+	rpc_unlink(idmap->idmap_path);
 	clp->cl_idmap = NULL;
 	kfree(idmap);
 }
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b5b51c196690..ab151bbb66df 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -59,7 +59,7 @@ struct rpc_clnt {
 
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
-	struct dentry *		__cl_parent_dentry;
+	char			cl_pathname[30];/* Path in rpc_pipe_fs */
 	struct dentry *		cl_dentry;	/* inode */
 	struct rpc_clnt *	cl_parent;	/* Points to parent of clones */
 	struct rpc_rtt		cl_rtt_default;
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 63878d05c9a9..63929349571f 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -41,11 +41,10 @@ RPC_I(struct inode *inode)
 
 extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
-extern struct dentry *rpc_mkdir(struct dentry *, char *, struct rpc_clnt *);
-extern void rpc_rmdir(struct dentry *);
-extern struct dentry *rpc_mkpipe(struct dentry *, char *, void *,
-		struct rpc_pipe_ops *, int flags);
-extern void rpc_unlink(struct dentry *);
+extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
+extern int rpc_rmdir(char *);
+extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
+extern int rpc_unlink(char *);
 
 #endif
 #endif
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index bd2555139fa9..d2b08f16c257 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -87,6 +87,7 @@ struct gss_auth {
 	struct list_head upcalls;
 	struct rpc_clnt *client;
 	struct dentry *dentry;
+	char path[48];
 	spinlock_t lock;
 };
 
@@ -689,8 +690,10 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 	if (err)
 		goto err_put_mech;
 
-	gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
-			clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+	snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s",
+			clnt->cl_pathname,
+			gss_auth->mech->gm_name);
+	gss_auth->dentry = rpc_mkpipe(gss_auth->path, clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
 	if (IS_ERR(gss_auth->dentry)) {
 		err = PTR_ERR(gss_auth->dentry);
 		goto err_put_mech;
@@ -715,7 +718,7 @@ gss_destroy(struct rpc_auth *auth)
 		auth, auth->au_flavor);
 
 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
-	rpc_unlink(gss_auth->dentry);
+	rpc_unlink(gss_auth->path);
 	gss_mech_put(gss_auth->mech);
 
 	rpcauth_free_credcache(auth);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 63bf591310e0..5a8f01d726e9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -67,42 +67,26 @@ static u32 *	call_verify(struct rpc_task *task);
 static int
 rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 {
-	static unsigned int clntid;
-	char name[128];
+	static uint32_t clntid;
 	int error;
 
 	if (dir_name == NULL)
 		return 0;
-
- retry_parent:
-	clnt->__cl_parent_dentry = rpc_mkdir(NULL, dir_name, NULL);
-	if (IS_ERR(clnt->__cl_parent_dentry)) {
-		error = PTR_ERR(clnt->__cl_parent_dentry);
-		if (error == -EEXIST)
-			goto retry_parent; /* XXX(hch): WTF? */
-	
-		printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
-				dir_name, error);
-		return error;
-	}
-
-
- retry_child:
-	snprintf(name, sizeof(name), "clnt%x", clntid++);
-	name[sizeof(name) - 1] = '\0';
-
-	clnt->cl_dentry = rpc_mkdir(clnt->__cl_parent_dentry, name, clnt);
-	if (IS_ERR(clnt->cl_dentry)) {
+	for (;;) {
+		snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
+				"%s/clnt%x", dir_name,
+				(unsigned int)clntid++);
+		clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0';
+		clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt);
+		if (!IS_ERR(clnt->cl_dentry))
+			return 0;
 		error = PTR_ERR(clnt->cl_dentry);
-		if (error == -EEXIST)
-			goto retry_child;
-		printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
-				name, error);
-		rpc_rmdir(clnt->__cl_parent_dentry);
-		return error;
+		if (error != -EEXIST) {
+			printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
+					clnt->cl_pathname, error);
+			return error;
+		}
 	}
-
-	return 0;
 }
 
 /*
@@ -190,8 +174,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
 	return clnt;
 
 out_no_auth:
-	rpc_rmdir(clnt->cl_dentry);
-	rpc_rmdir(clnt->__cl_parent_dentry);
+	rpc_rmdir(clnt->cl_pathname);
 out_no_path:
 	if (clnt->cl_server != clnt->cl_inline_name)
 		kfree(clnt->cl_server);
@@ -319,10 +302,8 @@ rpc_destroy_client(struct rpc_clnt *clnt)
 		rpc_destroy_client(clnt->cl_parent);
 		goto out_free;
 	}
-	if (clnt->cl_dentry)
-		rpc_rmdir(clnt->cl_dentry);
-	if (clnt->__cl_parent_dentry)
-		rpc_rmdir(clnt->__cl_parent_dentry);
+	if (clnt->cl_pathname[0])
+		rpc_rmdir(clnt->cl_pathname);
 	if (clnt->cl_xprt) {
 		xprt_destroy(clnt->cl_xprt);
 		clnt->cl_xprt = NULL;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index b382809726d8..ded6c63f11ec 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -414,6 +414,38 @@ rpc_put_mount(void)
 	simple_release_fs(&rpc_mount, &rpc_mount_count);
 }
 
+static int
+rpc_lookup_parent(char *path, struct nameidata *nd)
+{
+	if (path[0] == '\0')
+		return -ENOENT;
+	if (rpc_get_mount()) {
+		printk(KERN_WARNING "%s: %s failed to mount "
+			       "pseudofilesystem \n", __FILE__, __FUNCTION__);
+		return -ENODEV;
+	}
+	nd->mnt = mntget(rpc_mount);
+	nd->dentry = dget(rpc_mount->mnt_root);
+	nd->last_type = LAST_ROOT;
+	nd->flags = LOOKUP_PARENT;
+	nd->depth = 0;
+
+	if (path_walk(path, nd)) {
+		printk(KERN_WARNING "%s: %s failed to find path %s\n",
+				__FILE__, __FUNCTION__, path);
+		rpc_put_mount();
+		return -ENOENT;
+	}
+	return 0;
+}
+
+static void
+rpc_release_path(struct nameidata *nd)
+{
+	path_release(nd);
+	rpc_put_mount();
+}
+
 static struct inode *
 rpc_get_inode(struct super_block *sb, int mode)
 {
@@ -518,149 +550,197 @@ out_bad:
 	return -ENOMEM;
 }
 
-struct dentry *
-rpc_mkdir(struct dentry *parent, char *name, struct rpc_clnt *rpc_client)
+static int
+__rpc_mkdir(struct inode *dir, struct dentry *dentry)
 {
-	struct inode *dir;
-	struct dentry *dentry;
 	struct inode *inode;
+
+	inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR);
+	if (!inode)
+		goto out_err;
+	inode->i_ino = iunique(dir->i_sb, 100);
+	d_instantiate(dentry, inode);
+	dir->i_nlink++;
+	inode_dir_notify(dir, DN_CREATE);
+	rpc_get_mount();
+	return 0;
+out_err:
+	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
+			__FILE__, __FUNCTION__, dentry->d_name.name);
+	return -ENOMEM;
+}
+
+static int
+__rpc_rmdir(struct inode *dir, struct dentry *dentry)
+{
 	int error;
 
-	if (!parent)
-		parent = rpc_mount->mnt_root;
+	shrink_dcache_parent(dentry);
+	if (dentry->d_inode) {
+		rpc_close_pipes(dentry->d_inode);
+		rpc_inode_setowner(dentry->d_inode, NULL);
+	}
+	if ((error = simple_rmdir(dir, dentry)) != 0)
+		return error;
+	if (!error) {
+		inode_dir_notify(dir, DN_DELETE);
+		d_drop(dentry);
+		rpc_put_mount();
+	}
+	return 0;
+}
 
-	dir = parent->d_inode;
-	
-	error = rpc_get_mount();
-	if (error)
-		return ERR_PTR(error);
+static struct dentry *
+rpc_lookup_negative(char *path, struct nameidata *nd)
+{
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
 
+	if ((error = rpc_lookup_parent(path, nd)) != 0)
+		return ERR_PTR(error);
+	dir = nd->dentry->d_inode;
 	down(&dir->i_sem);
-	dentry = lookup_one_len(name, parent, strlen(name));
+	dentry = lookup_hash(&nd->last, nd->dentry);
 	if (IS_ERR(dentry))
-		goto out_unlock;
+		goto out_err;
 	if (dentry->d_inode) {
+		dput(dentry);
 		dentry = ERR_PTR(-EEXIST);
-		goto out_dput;
+		goto out_err;
 	}
-
-	inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUSR | S_IXUSR);
-	if (!inode)
-		goto out_dput;
-	inode->i_ino = iunique(dir->i_sb, 100);
-	dir->i_nlink++;
-	RPC_I(dentry->d_inode)->private = rpc_client;
-
-	d_instantiate(dentry, inode);
-	dget(dentry);
+	return dentry;
+out_err:
 	up(&dir->i_sem);
+	rpc_release_path(nd);
+	return dentry;
+}
 
-	inode_dir_notify(dir, DN_CREATE);
 
+struct dentry *
+rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
+
+	dentry = rpc_lookup_negative(path, &nd);
+	if (IS_ERR(dentry))
+		return dentry;
+	dir = nd.dentry->d_inode;
+	if ((error = __rpc_mkdir(dir, dentry)) != 0)
+		goto err_dput;
+	RPC_I(dentry->d_inode)->private = rpc_client;
 	error = rpc_populate(dentry, authfiles,
 			RPCAUTH_info, RPCAUTH_EOF);
 	if (error)
-		goto out_depopulate;
-
-	return dentry;
-
- out_depopulate:
-	rpc_rmdir(dentry);
- out_dput:
-	dput(dentry);
- out_unlock:
+		goto err_depopulate;
+out:
 	up(&dir->i_sem);
-	rpc_put_mount();
+	rpc_release_path(&nd);
 	return dentry;
+err_depopulate:
+	rpc_depopulate(dentry);
+	__rpc_rmdir(dir, dentry);
+err_dput:
+	dput(dentry);
+	printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n",
+			__FILE__, __FUNCTION__, path, error);
+	dentry = ERR_PTR(error);
+	goto out;
 }
 
-void
-rpc_rmdir(struct dentry *dentry)
+int
+rpc_rmdir(char *path)
 {
-	struct dentry *parent = dentry->d_parent;
-
-	rpc_depopulate(dentry);
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
 
-	down(&parent->d_inode->i_sem);
-	if (dentry->d_inode) {
-		rpc_close_pipes(dentry->d_inode);
-		rpc_inode_setowner(dentry->d_inode, NULL);
-		simple_rmdir(parent->d_inode, dentry);
+	if ((error = rpc_lookup_parent(path, &nd)) != 0)
+		return error;
+	dir = nd.dentry->d_inode;
+	down(&dir->i_sem);
+	dentry = lookup_hash(&nd.last, nd.dentry);
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
+		goto out_release;
 	}
-	up(&parent->d_inode->i_sem);
-
-	inode_dir_notify(parent->d_inode, DN_DELETE);
-	rpc_put_mount();
+	rpc_depopulate(dentry);
+	error = __rpc_rmdir(dir, dentry);
+	dput(dentry);
+out_release:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return error;
 }
 
 struct dentry *
-rpc_mkpipe(struct dentry *parent, char *name, void *private,
-	   struct rpc_pipe_ops *ops, int flags)
+rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
 {
-	struct inode *dir = parent->d_inode;
+	struct nameidata nd;
 	struct dentry *dentry;
-	struct inode *inode;
+	struct inode *dir, *inode;
 	struct rpc_inode *rpci;
-	int error;
-
-	error = rpc_get_mount();
-	if (error)
-		return ERR_PTR(error);
 
-	down(&parent->d_inode->i_sem);
-	dentry = lookup_one_len(name, parent, strlen(name));
+	dentry = rpc_lookup_negative(path, &nd);
 	if (IS_ERR(dentry))
-		goto out_unlock;
-	if (dentry->d_inode) {
-		dentry = ERR_PTR(-EEXIST);
-		goto out_dput;
-	}
-
-	inode = rpc_get_inode(parent->d_inode->i_sb,
-			S_IFSOCK | S_IRUSR | S_IWUSR);
-	if (!inode) {
-		dentry = ERR_PTR(-ENOMEM);
-		goto out_dput;
-	}
-
+		return dentry;
+	dir = nd.dentry->d_inode;
+	inode = rpc_get_inode(dir->i_sb, S_IFSOCK | S_IRUSR | S_IWUSR);
+	if (!inode)
+		goto err_dput;
 	inode->i_ino = iunique(dir->i_sb, 100);
 	inode->i_fop = &rpc_pipe_fops;
-
+	d_instantiate(dentry, inode);
 	rpci = RPC_I(inode);
 	rpci->private = private;
 	rpci->flags = flags;
 	rpci->ops = ops;
-
-	d_instantiate(dentry, inode);
-	dget(dentry);
-	up(&parent->d_inode->i_sem);
-
 	inode_dir_notify(dir, DN_CREATE);
+out:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
 	return dentry;
-
- out_dput:
+err_dput:
 	dput(dentry);
- out_unlock:
-	up(&parent->d_inode->i_sem);
-	rpc_put_mount();
-	return dentry;
+	dentry = ERR_PTR(-ENOMEM);
+	printk(KERN_WARNING "%s: %s() failed to create pipe %s (errno = %d)\n",
+			__FILE__, __FUNCTION__, path, -ENOMEM);
+	goto out;
 }
 
-void
-rpc_unlink(struct dentry *dentry)
+int
+rpc_unlink(char *path)
 {
-	struct dentry *parent = dentry->d_parent;
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct inode *dir;
+	int error;
 
-	down(&parent->d_inode->i_sem);
+	if ((error = rpc_lookup_parent(path, &nd)) != 0)
+		return error;
+	dir = nd.dentry->d_inode;
+	down(&dir->i_sem);
+	dentry = lookup_hash(&nd.last, nd.dentry);
+	if (IS_ERR(dentry)) {
+		error = PTR_ERR(dentry);
+		goto out_release;
+	}
+	d_drop(dentry);
 	if (dentry->d_inode) {
 		rpc_close_pipes(dentry->d_inode);
 		rpc_inode_setowner(dentry->d_inode, NULL);
-		simple_unlink(parent->d_inode, dentry);
+		error = simple_unlink(dir, dentry);
 	}
-	up(&parent->d_inode->i_sem);
-
-	inode_dir_notify(parent->d_inode, DN_DELETE);
-	rpc_put_mount();
+	dput(dentry);
+	inode_dir_notify(dir, DN_DELETE);
+out_release:
+	up(&dir->i_sem);
+	rpc_release_path(&nd);
+	return error;
 }
 
 /*
-- 
cgit v1.2.3-58-ga151


From cee54fc944422c44e476736c045a9e8053cb0644 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:12 -0700
Subject: NFSv4: Add functions to order RPC calls

 NFSv4 file state-changing functions such as OPEN, CLOSE, LOCK,... are all
 labelled with "sequence identifiers" in order to prevent the server from
 reordering RPC requests, as this could cause its file state to
 become out of sync with the client.

 Currently the NFS client code enforces this ordering locally using
 semaphores to restrict access to structures until the RPC call is done.
 This, of course, only works with synchronous RPC calls, since the
 user process must first grab the semaphore.
 By dropping semaphores, and instead teaching the RPC engine to hold
 the RPC calls until they are ready to be sent, we can extend this
 process to work nicely with asynchronous RPC calls too.

 This patch adds a new list called "rpc_sequence" that defines the order
 of the RPC calls to be sent. We add one such list for each state_owner.
 When an RPC call is ready to be sent, it checks if it is top of the
 rpc_sequence list. If so, it proceeds. If not, it goes back to sleep,
 and loops until it hits top of the list.
 Once the RPC call has completed, it can then bump the sequence id counter,
 and remove itself from the rpc_sequence list, and then wake up the next
 sleeper.

 Note that the state_owner sequence ids and lock_owner sequence ids are
 all indexed to the same rpc_sequence list, so OPEN, LOCK,... requests
 are all ordered w.r.t. each other.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |  42 ++++++++++++--
 fs/nfs/nfs4proc.c       | 111 +++++++++++++++++++++++-------------
 fs/nfs/nfs4state.c      | 145 +++++++++++++++++++++++++++++++++++++++---------
 fs/nfs/nfs4xdr.c        |  43 +++++++++++---
 include/linux/nfs_xdr.h |  15 ++---
 5 files changed, 273 insertions(+), 83 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ec1a22d7b876..6ac6708484f5 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -92,6 +92,35 @@ struct nfs4_client {
 	unsigned char		cl_id_uniquifier;
 };
 
+/*
+ * struct rpc_sequence ensures that RPC calls are sent in the exact
+ * order that they appear on the list.
+ */
+struct rpc_sequence {
+	struct rpc_wait_queue	wait;	/* RPC call delay queue */
+	spinlock_t lock;		/* Protects the list */
+	struct list_head list;		/* Defines sequence of RPC calls */
+};
+
+#define NFS_SEQID_CONFIRMED 1
+struct nfs_seqid_counter {
+	struct rpc_sequence *sequence;
+	int flags;
+	u32 counter;
+};
+
+struct nfs_seqid {
+	struct list_head list;
+	struct nfs_seqid_counter *sequence;
+	struct rpc_task *task;
+};
+
+static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
+{
+	if (seqid_mutating_err(-status))
+		seqid->flags |= NFS_SEQID_CONFIRMED;
+}
+
 /*
  * NFS4 state_owners and lock_owners are simply labels for ordered
  * sequences of RPC calls. Their sole purpose is to provide once-only
@@ -106,12 +135,13 @@ struct nfs4_state_owner {
 	struct nfs4_client   *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
 	struct semaphore     so_sema;
-	u32                  so_seqid;   /* protected by so_sema */
 	atomic_t	     so_count;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
 	struct list_head     so_states;
 	struct list_head     so_delegations;
+	struct nfs_seqid_counter so_seqid;
+	struct rpc_sequence  so_sequence;
 };
 
 /*
@@ -132,7 +162,7 @@ struct nfs4_lock_state {
 	fl_owner_t		ls_owner;	/* POSIX lock owner */
 #define NFS_LOCK_INITIALIZED 1
 	int			ls_flags;
-	u32			ls_seqid;
+	struct nfs_seqid_counter	ls_seqid;
 	u32			ls_id;
 	nfs4_stateid		ls_stateid;
 	atomic_t		ls_count;
@@ -224,12 +254,16 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
-extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
 extern void nfs4_schedule_state_recovery(struct nfs4_client *);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
-extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
 
+extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
+extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
+extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
+extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
+extern void nfs_free_seqid(struct nfs_seqid *seqid);
+
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9701ca8c9428..9ba89e7cdd28 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -218,7 +218,6 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
 	struct nfs_openargs o_arg = {
 		.fh = NFS_FH(inode),
-		.seqid = sp->so_seqid,
 		.id = sp->so_id,
 		.open_flags = state->state,
 		.clientid = server->nfs4_state->cl_clientid,
@@ -245,8 +244,13 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 		}
 		o_arg.u.delegation_type = delegation->type;
 	}
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	if (o_arg.seqid == NULL)
+		return -ENOMEM;
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	/* Confirm the sequence as being established */
+	nfs_confirm_seqid(&sp->so_seqid, status);
+	nfs_increment_open_seqid(status, o_arg.seqid);
 	if (status == 0) {
 		memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
 		if (o_res.delegation_type != 0) {
@@ -256,6 +260,7 @@ static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *st
 				nfs_async_inode_return_delegation(inode, &o_res.stateid);
 		}
 	}
+	nfs_free_seqid(o_arg.seqid);
 	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	/* Ensure we update the inode attributes */
 	NFS_CACHEINV(inode);
@@ -307,16 +312,20 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state
 		goto out;
 	if (state->state == 0)
 		goto out;
-	arg.seqid = sp->so_seqid;
+	arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	status = -ENOMEM;
+	if (arg.seqid == NULL)
+		goto out;
 	arg.open_flags = state->state;
 	memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	nfs_increment_open_seqid(status, arg.seqid);
 	if (status >= 0) {
 		memcpy(state->stateid.data, res.stateid.data,
 				sizeof(state->stateid.data));
 		clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
+	nfs_free_seqid(arg.seqid);
 out:
 	up(&sp->so_sema);
 	dput(parent);
@@ -345,11 +354,11 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 	return err;
 }
 
-static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid)
+static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid)
 {
 	struct nfs_open_confirmargs arg = {
 		.fh             = fh,
-		.seqid          = sp->so_seqid,
+		.seqid          = seqid,
 		.stateid	= *stateid,
 	};
 	struct nfs_open_confirmres res;
@@ -362,7 +371,9 @@ static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nf
 	int status;
 
 	status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	/* Confirm the sequence as being established */
+	nfs_confirm_seqid(&sp->so_seqid, status);
+	nfs_increment_open_seqid(status, seqid);
 	if (status >= 0)
 		memcpy(stateid, &res.stateid, sizeof(*stateid));
 	return status;
@@ -380,21 +391,21 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner  *sp, stru
 	int status;
 
 	/* Update sequence id. The caller must serialize! */
-	o_arg->seqid = sp->so_seqid;
 	o_arg->id = sp->so_id;
 	o_arg->clientid = sp->so_client->cl_clientid;
 
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
+	nfs_increment_open_seqid(status, o_arg->seqid);
 	if (status != 0)
 		goto out;
 	update_changeattr(dir, &o_res->cinfo);
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
 		status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
-				sp, &o_res->stateid);
+				sp, &o_res->stateid, o_arg->seqid);
 		if (status != 0)
 			goto out;
 	}
+	nfs_confirm_seqid(&sp->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
 		status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 out:
@@ -465,6 +476,10 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 		set_bit(NFS_DELEGATED_STATE, &state->flags);
 		goto out;
 	}
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	status = -ENOMEM;
+	if (o_arg.seqid == NULL)
+		goto out;
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_nodeleg;
@@ -490,6 +505,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 			nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res);
 	}
 out_nodeleg:
+	nfs_free_seqid(o_arg.seqid);
 	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 out:
 	dput(parent);
@@ -667,6 +683,9 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	/* Serialization for the sequence id */
 	down(&sp->so_sema);
 
+	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+	if (o_arg.seqid == NULL)
+		return -ENOMEM;
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_err;
@@ -681,6 +700,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	update_open_stateid(state, &o_res.stateid, flags);
 	if (o_res.delegation_type != 0)
 		nfs_inode_set_delegation(inode, cred, &o_res);
+	nfs_free_seqid(o_arg.seqid);
 	up(&sp->so_sema);
 	nfs4_put_state_owner(sp);
 	up_read(&clp->cl_sem);
@@ -690,6 +710,7 @@ out_err:
 	if (sp != NULL) {
 		if (state != NULL)
 			nfs4_put_open_state(state);
+		nfs_free_seqid(o_arg.seqid);
 		up(&sp->so_sema);
 		nfs4_put_state_owner(sp);
 	}
@@ -718,7 +739,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 		 * It is actually a sign of a bug on the client or on the server.
 		 *
 		 * If we receive a BAD_SEQID error in the particular case of
-		 * doing an OPEN, we assume that nfs4_increment_seqid() will
+		 * doing an OPEN, we assume that nfs_increment_open_seqid() will
 		 * have unhashed the old state_owner for us, and that we can
 		 * therefore safely retry using a new one. We should still warn
 		 * the user though...
@@ -799,7 +820,7 @@ static void nfs4_close_done(struct rpc_task *task)
         /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
-	nfs4_increment_seqid(task->tk_status, sp);
+	nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
 			memcpy(&state->stateid, &calldata->res.stateid,
@@ -818,6 +839,7 @@ static void nfs4_close_done(struct rpc_task *task)
 	}
 	state->state = calldata->arg.open_flags;
 	nfs4_put_open_state(state);
+	nfs_free_seqid(calldata->arg.seqid);
 	up(&sp->so_sema);
 	nfs4_put_state_owner(sp);
 	up_read(&server->nfs4_state->cl_sem);
@@ -865,7 +887,11 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode)
 	calldata->state = state;
 	calldata->arg.fh = NFS_FH(inode);
 	/* Serialization for the sequence id */
-	calldata->arg.seqid = state->owner->so_seqid;
+	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
+	if (calldata->arg.seqid == NULL) {
+		kfree(calldata);
+		return -ENOMEM;
+	}
 	calldata->arg.open_flags = mode;
 	memcpy(&calldata->arg.stateid, &state->stateid,
 			sizeof(calldata->arg.stateid));
@@ -2729,15 +2755,19 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 	/* We might have lost the locks! */
 	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
 		goto out;
-	luargs.seqid = lsp->ls_seqid;
-	memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid));
+	luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	status = -ENOMEM;
+	if (luargs.seqid == NULL)
+		goto out;
+	memcpy(luargs.stateid.data, lsp->ls_stateid.data, sizeof(luargs.stateid.data));
 	arg.u.locku = &luargs;
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_lock_seqid(status, lsp);
+	nfs_increment_lock_seqid(status, luargs.seqid);
 
 	if (status == 0)
-		memcpy(&lsp->ls_stateid,  &res.u.stateid, 
-				sizeof(lsp->ls_stateid));
+		memcpy(lsp->ls_stateid.data, res.u.stateid.data, 
+				sizeof(lsp->ls_stateid.data));
+	nfs_free_seqid(luargs.seqid);
 out:
 	up(&state->lock_sema);
 	if (status == 0)
@@ -2783,9 +2813,13 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 		.reclaim = reclaim,
 		.new_lock_owner = 0,
 	};
-	int status;
+	struct nfs_seqid *lock_seqid;
+	int status = -ENOMEM;
 
-	if (!(lsp->ls_flags & NFS_LOCK_INITIALIZED)) {
+	lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (lock_seqid == NULL)
+		return -ENOMEM;
+	if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
 		struct nfs4_state_owner *owner = state->owner;
 		struct nfs_open_to_lock otl = {
 			.lock_owner = {
@@ -2793,39 +2827,40 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 			},
 		};
 
-		otl.lock_seqid = lsp->ls_seqid;
+		otl.lock_seqid = lock_seqid;
 		otl.lock_owner.id = lsp->ls_id;
 		memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid));
 		largs.u.open_lock = &otl;
 		largs.new_lock_owner = 1;
 		arg.u.lock = &largs;
 		down(&owner->so_sema);
-		otl.open_seqid = owner->so_seqid;
-		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		/* increment open_owner seqid on success, and 
-		* seqid mutating errors */
-		nfs4_increment_seqid(status, owner);
-		up(&owner->so_sema);
-		if (status == 0) {
-			lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-			lsp->ls_seqid++;
+		otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
+		if (otl.open_seqid != NULL) {
+			status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+			/* increment seqid on success, and seqid mutating errors */
+			nfs_increment_open_seqid(status, otl.open_seqid);
+			nfs_free_seqid(otl.open_seqid);
 		}
+		up(&owner->so_sema);
+		if (status == 0)
+			nfs_confirm_seqid(&lsp->ls_seqid, 0);
 	} else {
-		struct nfs_exist_lock el = {
-			.seqid = lsp->ls_seqid,
-		};
+		struct nfs_exist_lock el;
 		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
 		largs.u.exist_lock = &el;
 		arg.u.lock = &largs;
+		el.seqid = lock_seqid;
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-		/* increment seqid on success, and * seqid mutating errors*/
-		nfs4_increment_lock_seqid(status, lsp);
 	}
+	/* increment seqid on success, and seqid mutating errors*/
+	nfs_increment_lock_seqid(status, lock_seqid);
 	/* save the returned stateid. */
-	if (status == 0)
-		memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid));
-	else if (status == -NFS4ERR_DENIED)
+	if (status == 0) {
+		memcpy(lsp->ls_stateid.data, res.u.stateid.data, sizeof(lsp->ls_stateid.data));
+		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+	} else if (status == -NFS4ERR_DENIED)
 		status = -EAGAIN;
+	nfs_free_seqid(lock_seqid);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index afe587d82f1e..f535c219cf3a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -264,13 +264,16 @@ nfs4_alloc_state_owner(void)
 {
 	struct nfs4_state_owner *sp;
 
-	sp = kmalloc(sizeof(*sp),GFP_KERNEL);
+	sp = kzalloc(sizeof(*sp),GFP_KERNEL);
 	if (!sp)
 		return NULL;
 	init_MUTEX(&sp->so_sema);
-	sp->so_seqid = 0;                 /* arbitrary */
 	INIT_LIST_HEAD(&sp->so_states);
 	INIT_LIST_HEAD(&sp->so_delegations);
+	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
+	sp->so_seqid.sequence = &sp->so_sequence;
+	spin_lock_init(&sp->so_sequence.lock);
+	INIT_LIST_HEAD(&sp->so_sequence.list);
 	atomic_set(&sp->so_count, 1);
 	return sp;
 }
@@ -553,12 +556,10 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 	struct nfs4_lock_state *lsp;
 	struct nfs4_client *clp = state->owner->so_client;
 
-	lsp = kmalloc(sizeof(*lsp), GFP_KERNEL);
+	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
 	if (lsp == NULL)
 		return NULL;
-	lsp->ls_flags = 0;
-	lsp->ls_seqid = 0;	/* arbitrary */
-	memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data));
+	lsp->ls_seqid.sequence = &state->owner->so_sequence;
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
 	spin_lock(&clp->cl_lock);
@@ -673,29 +674,102 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
 	nfs4_put_lock_state(lsp);
 }
 
-/*
-* Called with state->lock_sema and clp->cl_sem held.
-*/
-void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp)
+struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
+{
+	struct rpc_sequence *sequence = counter->sequence;
+	struct nfs_seqid *new;
+
+	new = kmalloc(sizeof(*new), GFP_KERNEL);
+	if (new != NULL) {
+		new->sequence = counter;
+		new->task = NULL;
+		spin_lock(&sequence->lock);
+		list_add_tail(&new->list, &sequence->list);
+		spin_unlock(&sequence->lock);
+	}
+	return new;
+}
+
+void nfs_free_seqid(struct nfs_seqid *seqid)
 {
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		lsp->ls_seqid++;
+	struct rpc_sequence *sequence = seqid->sequence->sequence;
+	struct rpc_task *next = NULL;
+
+	spin_lock(&sequence->lock);
+	list_del(&seqid->list);
+	if (!list_empty(&sequence->list)) {
+		next = list_entry(sequence->list.next, struct nfs_seqid, list)->task;
+		if (next)
+			rpc_wake_up_task(next);
+	}
+	spin_unlock(&sequence->lock);
+	kfree(seqid);
 }
 
 /*
-* Called with sp->so_sema and clp->cl_sem held.
-*
-* Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
-* failed with a seqid incrementing error -
-* see comments nfs_fs.h:seqid_mutating_error()
-*/
-void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
-{
-	if (status == NFS_OK || seqid_mutating_err(-status))
-		sp->so_seqid++;
-	/* If the server returns BAD_SEQID, unhash state_owner here */
-	if (status == -NFS4ERR_BAD_SEQID)
+ * Called with sp->so_sema and clp->cl_sem held.
+ *
+ * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
+ * failed with a seqid incrementing error -
+ * see comments nfs_fs.h:seqid_mutating_error()
+ */
+static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+{
+	switch (status) {
+		case 0:
+			break;
+		case -NFS4ERR_BAD_SEQID:
+		case -NFS4ERR_STALE_CLIENTID:
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_BAD_STATEID:
+		case -NFS4ERR_BADXDR:
+		case -NFS4ERR_RESOURCE:
+		case -NFS4ERR_NOFILEHANDLE:
+			/* Non-seqid mutating errors */
+			return;
+	};
+	/*
+	 * Note: no locking needed as we are guaranteed to be first
+	 * on the sequence list
+	 */
+	seqid->sequence->counter++;
+}
+
+void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
+{
+	if (status == -NFS4ERR_BAD_SEQID) {
+		struct nfs4_state_owner *sp = container_of(seqid->sequence,
+				struct nfs4_state_owner, so_seqid);
 		nfs4_drop_state_owner(sp);
+	}
+	return nfs_increment_seqid(status, seqid);
+}
+
+/*
+ * Called with ls->lock_sema and clp->cl_sem held.
+ *
+ * Increment the seqid if the LOCK/LOCKU succeeded, or
+ * failed with a seqid incrementing error -
+ * see comments nfs_fs.h:seqid_mutating_error()
+ */
+void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
+{
+	return nfs_increment_seqid(status, seqid);
+}
+
+int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
+{
+	struct rpc_sequence *sequence = seqid->sequence->sequence;
+	int status = 0;
+
+	spin_lock(&sequence->lock);
+	if (sequence->list.next != &seqid->list) {
+		seqid->task = task;
+		rpc_sleep_on(&sequence->wait, task, NULL, NULL);
+		status = -EAGAIN;
+	}
+	spin_unlock(&sequence->lock);
+	return status;
 }
 
 static int reclaimer(void *);
@@ -791,8 +865,6 @@ static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct n
 		if (state->state == 0)
 			continue;
 		status = ops->recover_open(sp, state);
-		list_for_each_entry(lock, &state->lock_states, ls_locks)
-			lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
 		if (status >= 0) {
 			status = nfs4_reclaim_locks(ops, state);
 			if (status < 0)
@@ -831,6 +903,26 @@ out_err:
 	return status;
 }
 
+static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
+{
+	struct nfs4_state_owner *sp;
+	struct nfs4_state *state;
+	struct nfs4_lock_state *lock;
+
+	/* Reset all sequence ids to zero */
+	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+		sp->so_seqid.counter = 0;
+		sp->so_seqid.flags = 0;
+		list_for_each_entry(state, &sp->so_states, open_states) {
+			list_for_each_entry(lock, &state->lock_states, ls_locks) {
+				lock->ls_seqid.counter = 0;
+				lock->ls_seqid.flags = 0;
+				lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
+			}
+		}
+	}
+}
+
 static int reclaimer(void *ptr)
 {
 	struct reclaimer_args *args = (struct reclaimer_args *)ptr;
@@ -864,6 +956,7 @@ restart_loop:
 		default:
 			ops = &nfs4_network_partition_recovery_ops;
 	};
+	nfs4_state_mark_reclaim(clp);
 	status = __nfs4_init_client(clp);
 	if (status)
 		goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6c564ef9489e..fcd28a29a2f8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -604,7 +604,7 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
 
 	RESERVE_SPACE(8+sizeof(arg->stateid.data));
 	WRITE32(OP_CLOSE);
-	WRITE32(arg->seqid);
+	WRITE32(arg->seqid->sequence->counter);
 	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
 	
 	return 0;
@@ -732,9 +732,9 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 		struct nfs_open_to_lock *ol = opargs->u.open_lock;
 
 		RESERVE_SPACE(40);
-		WRITE32(ol->open_seqid);
+		WRITE32(ol->open_seqid->sequence->counter);
 		WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid));
-		WRITE32(ol->lock_seqid);
+		WRITE32(ol->lock_seqid->sequence->counter);
 		WRITE64(ol->lock_owner.clientid);
 		WRITE32(4);
 		WRITE32(ol->lock_owner.id);
@@ -744,7 +744,7 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 
 		RESERVE_SPACE(20);
 		WRITEMEM(&el->stateid, sizeof(el->stateid));
-		WRITE32(el->seqid);
+		WRITE32(el->seqid->sequence->counter);
 	}
 
 	return 0;
@@ -775,7 +775,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 	RESERVE_SPACE(44);
 	WRITE32(OP_LOCKU);
 	WRITE32(arg->type);
-	WRITE32(opargs->seqid);
+	WRITE32(opargs->seqid->sequence->counter);
 	WRITEMEM(&opargs->stateid, sizeof(opargs->stateid));
 	WRITE64(arg->offset);
 	WRITE64(arg->length);
@@ -826,7 +826,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
  */
 	RESERVE_SPACE(8);
 	WRITE32(OP_OPEN);
-	WRITE32(arg->seqid);
+	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
 	RESERVE_SPACE(16);
 	WRITE64(arg->clientid);
@@ -941,7 +941,7 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
 	RESERVE_SPACE(8+sizeof(arg->stateid.data));
 	WRITE32(OP_OPEN_CONFIRM);
 	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
-	WRITE32(arg->seqid);
+	WRITE32(arg->seqid->sequence->counter);
 
 	return 0;
 }
@@ -953,7 +953,7 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
 	RESERVE_SPACE(8+sizeof(arg->stateid.data));
 	WRITE32(OP_OPEN_DOWNGRADE);
 	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
-	WRITE32(arg->seqid);
+	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
 	return 0;
 }
@@ -1416,6 +1416,9 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos
         };
         int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
         xdr_init_encode(&xdr, &req->rq_snd_buf, p);
         encode_compound_hdr(&xdr, &hdr);
         status = encode_putfh(&xdr, args->fh);
@@ -1437,6 +1440,9 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1464,6 +1470,9 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1485,6 +1494,9 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nf
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1506,6 +1518,9 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1525,8 +1540,17 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka
 	struct compound_hdr hdr = {
 		.nops   = 2,
 	};
+	struct nfs_lock_opargs *opargs = args->u.lock;
+	struct nfs_seqid *seqid;
 	int status;
 
+	if (opargs->new_lock_owner)
+		seqid = opargs->u.open_lock->lock_seqid;
+	else
+		seqid = opargs->u.exist_lock->seqid;
+	status = nfs_wait_on_sequence(seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1569,6 +1593,9 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lock
 	};
 	int status;
 
+	status = nfs_wait_on_sequence(args->u.locku->seqid, req->rq_task);
+	if (status != 0)
+		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a2bf6914ff1b..d578912bf9a9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -96,12 +96,13 @@ struct nfs4_change_info {
 	u64			after;
 };
 
+struct nfs_seqid;
 /*
  * Arguments to the open call.
  */
 struct nfs_openargs {
 	const struct nfs_fh *	fh;
-	__u32                   seqid;
+	struct nfs_seqid *	seqid;
 	int			open_flags;
 	__u64                   clientid;
 	__u32                   id;
@@ -136,7 +137,7 @@ struct nfs_openres {
 struct nfs_open_confirmargs {
 	const struct nfs_fh *	fh;
 	nfs4_stateid            stateid;
-	__u32                   seqid;
+	struct nfs_seqid *	seqid;
 };
 
 struct nfs_open_confirmres {
@@ -149,7 +150,7 @@ struct nfs_open_confirmres {
 struct nfs_closeargs {
 	struct nfs_fh *         fh;
 	nfs4_stateid            stateid;
-	__u32                   seqid;
+	struct nfs_seqid *	seqid;
 	int			open_flags;
 };
 
@@ -165,15 +166,15 @@ struct nfs_lowner {
 };
 
 struct nfs_open_to_lock {
-	__u32                   open_seqid;
+	struct nfs_seqid *	open_seqid;
 	nfs4_stateid            open_stateid;
-	__u32                   lock_seqid;
+	struct nfs_seqid *	lock_seqid;
 	struct nfs_lowner       lock_owner;
 };
 
 struct nfs_exist_lock {
 	nfs4_stateid            stateid;
-	__u32                   seqid;
+	struct nfs_seqid *	seqid;
 };
 
 struct nfs_lock_opargs {
@@ -186,7 +187,7 @@ struct nfs_lock_opargs {
 };
 
 struct nfs_locku_opargs {
-	__u32                   seqid;
+	struct nfs_seqid *	seqid;
 	nfs4_stateid            stateid;
 };
 
-- 
cgit v1.2.3-58-ga151


From 9512135df14f8293b9bc5e8fb22d4279dee5ff66 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:12 -0700
Subject: NFSv4: Fix a potential CLOSE race

 Once the state_owner and lock_owner semaphores get removed, it will be
 possible for other OPEN requests to reopen the same file if they have
 lower sequence ids than our CLOSE call.
 This patch ensures that we recheck the file state once
 nfs_wait_on_sequence() has completed waiting.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 110 +++++++++++++++++++++++++++++++++---------------
 fs/nfs/nfs4state.c      |   6 ++-
 fs/nfs/nfs4xdr.c        |  14 ++----
 include/linux/nfs_xdr.h |   2 +-
 4 files changed, 87 insertions(+), 45 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9ba89e7cdd28..5154ddf6d9a5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -189,6 +189,21 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf
 		nfsi->change_attr = cinfo->after;
 }
 
+/* Helper for asynchronous RPC calls */
+static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin,
+		rpc_action tk_exit, void *calldata)
+{
+	struct rpc_task *task;
+
+	if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC)))
+		return -ENOMEM;
+
+	task->tk_calldata = calldata;
+	task->tk_action = tk_begin;
+	rpc_execute(task);
+	return 0;
+}
+
 static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
 {
 	struct inode *inode = state->inode;
@@ -810,11 +825,24 @@ struct nfs4_closedata {
 	struct nfs_closeres res;
 };
 
+static void nfs4_free_closedata(struct nfs4_closedata *calldata)
+{
+	struct nfs4_state *state = calldata->state;
+	struct nfs4_state_owner *sp = state->owner;
+	struct nfs_server *server = NFS_SERVER(calldata->inode);
+
+	nfs4_put_open_state(calldata->state);
+	nfs_free_seqid(calldata->arg.seqid);
+	up(&sp->so_sema);
+	nfs4_put_state_owner(sp);
+	up_read(&server->nfs4_state->cl_sem);
+	kfree(calldata);
+}
+
 static void nfs4_close_done(struct rpc_task *task)
 {
 	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
 	struct nfs4_state *state = calldata->state;
-	struct nfs4_state_owner *sp = state->owner;
 	struct nfs_server *server = NFS_SERVER(calldata->inode);
 
         /* hmm. we are done with the inode, and in the process of freeing
@@ -838,25 +866,46 @@ static void nfs4_close_done(struct rpc_task *task)
 			}
 	}
 	state->state = calldata->arg.open_flags;
-	nfs4_put_open_state(state);
-	nfs_free_seqid(calldata->arg.seqid);
-	up(&sp->so_sema);
-	nfs4_put_state_owner(sp);
-	up_read(&server->nfs4_state->cl_sem);
-	kfree(calldata);
+	nfs4_free_closedata(calldata);
 }
 
-static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
+static void nfs4_close_begin(struct rpc_task *task)
 {
+	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_state *state = calldata->state;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
 		.rpc_argp = &calldata->arg,
 		.rpc_resp = &calldata->res,
-		.rpc_cred = calldata->state->owner->so_cred,
+		.rpc_cred = state->owner->so_cred,
 	};
-	if (calldata->arg.open_flags != 0)
+	int mode = 0;
+	int status;
+
+	status = nfs_wait_on_sequence(calldata->arg.seqid, task);
+	if (status != 0)
+		return;
+	/* Don't reorder reads */
+	smp_rmb();
+	/* Recalculate the new open mode in case someone reopened the file
+	 * while we were waiting in line to be scheduled.
+	 */
+	if (state->nreaders != 0)
+		mode |= FMODE_READ;
+	if (state->nwriters != 0)
+		mode |= FMODE_WRITE;
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
+		state->state = mode;
+	if (mode == state->state) {
+		nfs4_free_closedata(calldata);
+		task->tk_exit = NULL;
+		rpc_exit(task, 0);
+		return;
+	}
+	if (mode != 0)
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
-	return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
+	calldata->arg.open_flags = mode;
+	rpc_call_setup(task, &msg, 0);
 }
 
 /* 
@@ -873,35 +922,30 @@ static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *
 int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
 {
 	struct nfs4_closedata *calldata;
-	int status;
+	int status = -ENOMEM;
 
-	/* Tell caller we're done */
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-		state->state = mode;
-		return 0;
-	}
-	calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
+	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
 	if (calldata == NULL)
-		return -ENOMEM;
+		goto out;
 	calldata->inode = inode;
 	calldata->state = state;
 	calldata->arg.fh = NFS_FH(inode);
+	calldata->arg.stateid = &state->stateid;
 	/* Serialization for the sequence id */
 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
-	if (calldata->arg.seqid == NULL) {
-		kfree(calldata);
-		return -ENOMEM;
-	}
-	calldata->arg.open_flags = mode;
-	memcpy(&calldata->arg.stateid, &state->stateid,
-			sizeof(calldata->arg.stateid));
-	status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
-	/*
-	 * Return -EINPROGRESS on success in order to indicate to the
-	 * caller that an asynchronous RPC call has been launched, and
-	 * that it will release the semaphores on completion.
-	 */
-	return (status == 0) ? -EINPROGRESS : status;
+	if (calldata->arg.seqid == NULL)
+		goto out_free_calldata;
+
+	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_close_begin,
+			nfs4_close_done, calldata);
+	if (status == 0)
+		goto out;
+
+	nfs_free_seqid(calldata->arg.seqid);
+out_free_calldata:
+	kfree(calldata);
+out:
+	return status;
 }
 
 struct inode *
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f535c219cf3a..59c93f37e1b2 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -518,7 +518,11 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 			newstate |= FMODE_WRITE;
 		if (state->state == newstate)
 			goto out;
-		if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
+		if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+			state->state = newstate;
+			goto out;
+		}
+		if (nfs4_do_close(inode, state, newstate) == 0)
 			return;
 	}
 out:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index fcd28a29a2f8..934ec50ea6bf 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -602,10 +602,10 @@ static int encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg)
 {
 	uint32_t *p;
 
-	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_CLOSE);
 	WRITE32(arg->seqid->sequence->counter);
-	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
 	
 	return 0;
 }
@@ -950,9 +950,9 @@ static int encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closea
 {
 	uint32_t *p;
 
-	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_OPEN_DOWNGRADE);
-	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
 	WRITE32(arg->seqid->sequence->counter);
 	encode_share_access(xdr, arg->open_flags);
 	return 0;
@@ -1416,9 +1416,6 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos
         };
         int status;
 
-	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
-	if (status != 0)
-		goto out;
         xdr_init_encode(&xdr, &req->rq_snd_buf, p);
         encode_compound_hdr(&xdr, &hdr);
         status = encode_putfh(&xdr, args->fh);
@@ -1518,9 +1515,6 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct
 	};
 	int status;
 
-	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
-	if (status != 0)
-		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d578912bf9a9..cac0df950c66 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -149,7 +149,7 @@ struct nfs_open_confirmres {
  */
 struct nfs_closeargs {
 	struct nfs_fh *         fh;
-	nfs4_stateid            stateid;
+	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 	int			open_flags;
 };
-- 
cgit v1.2.3-58-ga151


From e6dfa553cffcb9740f932311dff42f81d6ac63bb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:13 -0700
Subject: NFSv4: Remove obsolete state_owner and lock_owner semaphores

 OPEN, CLOSE, etc no longer need these semaphores to ensure ordering of
 requests.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h   |  6 ------
 fs/nfs/nfs4proc.c  | 18 ------------------
 fs/nfs/nfs4state.c | 15 ++++-----------
 3 files changed, 4 insertions(+), 35 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6ac6708484f5..d4fcb5d0ce6c 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -125,16 +125,11 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
  * NFS4 state_owners and lock_owners are simply labels for ordered
  * sequences of RPC calls. Their sole purpose is to provide once-only
  * semantics by allowing the server to identify replayed requests.
- *
- * The ->so_sema is held during all state_owner seqid-mutating operations:
- * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize
- * so_seqid.
  */
 struct nfs4_state_owner {
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
 	struct nfs4_client   *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
-	struct semaphore     so_sema;
 	atomic_t	     so_count;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
@@ -183,7 +178,6 @@ struct nfs4_state {
 	struct inode *inode;		/* Pointer to the inode */
 
 	unsigned long flags;		/* Do we hold any locks? */
-	struct semaphore lock_sema;	/* Serializes file locking operations */
 	spinlock_t state_lock;		/* Protects the lock_states list */
 
 	nfs4_stateid stateid;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5154ddf6d9a5..25bab6032dfe 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -224,7 +224,6 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
 /*
  * OPEN_RECLAIM:
  * 	reclaim state on the server after a reboot.
- * 	Assumes caller is holding the sp->so_sem
  */
 static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
 {
@@ -322,7 +321,6 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state
 	};
 	int status = 0;
 
-	down(&sp->so_sema);
 	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
 		goto out;
 	if (state->state == 0)
@@ -342,7 +340,6 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state
 	}
 	nfs_free_seqid(arg.seqid);
 out:
-	up(&sp->so_sema);
 	dput(parent);
 	return status;
 }
@@ -595,7 +592,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
 		goto out_err;
 	}
-	down(&sp->so_sema);
 	state = nfs4_get_open_state(inode, sp);
 	if (state == NULL)
 		goto out_err;
@@ -620,7 +616,6 @@ static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred
 	set_bit(NFS_DELEGATED_STATE, &state->flags);
 	update_open_stateid(state, &delegation->stateid, open_flags);
 out_ok:
-	up(&sp->so_sema);
 	nfs4_put_state_owner(sp);
 	up_read(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
@@ -631,7 +626,6 @@ out_err:
 	if (sp != NULL) {
 		if (state != NULL)
 			nfs4_put_open_state(state);
-		up(&sp->so_sema);
 		nfs4_put_state_owner(sp);
 	}
 	up_read(&nfsi->rwsem);
@@ -696,7 +690,6 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	} else
 		o_arg.u.attrs = sattr;
 	/* Serialization for the sequence id */
-	down(&sp->so_sema);
 
 	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
 	if (o_arg.seqid == NULL)
@@ -716,7 +709,6 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	if (o_res.delegation_type != 0)
 		nfs_inode_set_delegation(inode, cred, &o_res);
 	nfs_free_seqid(o_arg.seqid);
-	up(&sp->so_sema);
 	nfs4_put_state_owner(sp);
 	up_read(&clp->cl_sem);
 	*res = state;
@@ -726,7 +718,6 @@ out_err:
 		if (state != NULL)
 			nfs4_put_open_state(state);
 		nfs_free_seqid(o_arg.seqid);
-		up(&sp->so_sema);
 		nfs4_put_state_owner(sp);
 	}
 	/* Note: clp->cl_sem must be released before nfs4_put_open_state()! */
@@ -833,7 +824,6 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata)
 
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
-	up(&sp->so_sema);
 	nfs4_put_state_owner(sp);
 	up_read(&server->nfs4_state->cl_sem);
 	kfree(calldata);
@@ -2702,7 +2692,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 
 	down_read(&clp->cl_sem);
 	nlo.clientid = clp->cl_clientid;
-	down(&state->lock_sema);
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		goto out;
@@ -2729,7 +2718,6 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		status = 0;
 	}
 out:
-	up(&state->lock_sema);
 	up_read(&clp->cl_sem);
 	return status;
 }
@@ -2791,7 +2779,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 	int status;
 			
 	down_read(&clp->cl_sem);
-	down(&state->lock_sema);
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		goto out;
@@ -2813,7 +2800,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 				sizeof(lsp->ls_stateid.data));
 	nfs_free_seqid(luargs.seqid);
 out:
-	up(&state->lock_sema);
 	if (status == 0)
 		do_vfs_lock(request->fl_file, request);
 	up_read(&clp->cl_sem);
@@ -2877,7 +2863,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 		largs.u.open_lock = &otl;
 		largs.new_lock_owner = 1;
 		arg.u.lock = &largs;
-		down(&owner->so_sema);
 		otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
 		if (otl.open_seqid != NULL) {
 			status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
@@ -2885,7 +2870,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 			nfs_increment_open_seqid(status, otl.open_seqid);
 			nfs_free_seqid(otl.open_seqid);
 		}
-		up(&owner->so_sema);
 		if (status == 0)
 			nfs_confirm_seqid(&lsp->ls_seqid, 0);
 	} else {
@@ -2944,11 +2928,9 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
 	int status;
 
 	down_read(&clp->cl_sem);
-	down(&state->lock_sema);
 	status = nfs4_set_lock_state(state, request);
 	if (status == 0)
 		status = _nfs4_do_setlk(state, cmd, request, 0);
-	up(&state->lock_sema);
 	if (status == 0) {
 		/* Note: we always want to sleep here! */
 		request->fl_flags |= FL_SLEEP;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 59c93f37e1b2..86c08c165ce7 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -267,7 +267,6 @@ nfs4_alloc_state_owner(void)
 	sp = kzalloc(sizeof(*sp),GFP_KERNEL);
 	if (!sp)
 		return NULL;
-	init_MUTEX(&sp->so_sema);
 	INIT_LIST_HEAD(&sp->so_states);
 	INIT_LIST_HEAD(&sp->so_delegations);
 	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
@@ -362,7 +361,6 @@ nfs4_alloc_open_state(void)
 	memset(state->stateid.data, 0, sizeof(state->stateid.data));
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
-	init_MUTEX(&state->lock_sema);
 	spin_lock_init(&state->state_lock);
 	return state;
 }
@@ -444,7 +442,6 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
 	state = __nfs4_find_state_byowner(inode, owner);
 	if (state == NULL && new != NULL) {
 		state = new;
-		/* Caller *must* be holding owner->so_sem */
 		/* Note: The reclaim code dictates that we add stateless
 		 * and read-only stateids to the end of the list */
 		list_add_tail(&state->open_states, &owner->so_states);
@@ -464,7 +461,7 @@ out:
 
 /*
  * Beware! Caller must be holding exactly one
- * reference to clp->cl_sem and owner->so_sema!
+ * reference to clp->cl_sem!
  */
 void nfs4_put_open_state(struct nfs4_state *state)
 {
@@ -485,7 +482,6 @@ void nfs4_put_open_state(struct nfs4_state *state)
 
 /*
  * Beware! Caller must be holding no references to clp->cl_sem!
- * of owner->so_sema!
  */
 void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 {
@@ -496,7 +492,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 
 	atomic_inc(&owner->so_count);
 	down_read(&clp->cl_sem);
-	down(&owner->so_sema);
 	/* Protect against nfs4_find_state() */
 	spin_lock(&inode->i_lock);
 	if (mode & FMODE_READ)
@@ -527,7 +522,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 	}
 out:
 	nfs4_put_open_state(state);
-	up(&owner->so_sema);
 	nfs4_put_state_owner(owner);
 	up_read(&clp->cl_sem);
 }
@@ -553,7 +547,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  *
- * The caller must be holding state->lock_sema
  */
 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
 {
@@ -577,7 +570,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
  *
- * The caller must be holding state->lock_sema and clp->cl_sem
+ * The caller must be holding clp->cl_sem
  */
 static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
 {
@@ -711,7 +704,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
 }
 
 /*
- * Called with sp->so_sema and clp->cl_sem held.
+ * Called with clp->cl_sem held.
  *
  * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
  * failed with a seqid incrementing error -
@@ -750,7 +743,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 }
 
 /*
- * Called with ls->lock_sema and clp->cl_sem held.
+ * Called with clp->cl_sem held.
  *
  * Increment the seqid if the LOCK/LOCKU succeeded, or
  * failed with a seqid incrementing error -
-- 
cgit v1.2.3-58-ga151


From 83c9d41e456033000ccfadf535f3098d8739488d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:13 -0700
Subject: NFSv4: Remove nfs4_client->cl_sem from close() path

 We no longer need to worry about collisions between close() and the state
 recovery code, since the new close will automatically recheck the
 file state once it is done waiting on its sequence slot.

 Ditto for the nfs4_proc_locku() procedure.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c  | 5 -----
 fs/nfs/nfs4state.c | 9 +--------
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 25bab6032dfe..611052bacaff 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -820,12 +820,10 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata)
 {
 	struct nfs4_state *state = calldata->state;
 	struct nfs4_state_owner *sp = state->owner;
-	struct nfs_server *server = NFS_SERVER(calldata->inode);
 
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_state_owner(sp);
-	up_read(&server->nfs4_state->cl_sem);
 	kfree(calldata);
 }
 
@@ -2758,7 +2756,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs4_client *clp = server->nfs4_state;
 	struct nfs_lockargs arg = {
 		.fh = NFS_FH(inode),
 		.type = nfs4_lck_type(cmd, request),
@@ -2778,7 +2775,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 	struct nfs_locku_opargs luargs;
 	int status;
 			
-	down_read(&clp->cl_sem);
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		goto out;
@@ -2802,7 +2798,6 @@ static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock
 out:
 	if (status == 0)
 		do_vfs_lock(request->fl_file, request);
-	up_read(&clp->cl_sem);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 86c08c165ce7..bb3574361958 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -481,17 +481,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
 }
 
 /*
- * Beware! Caller must be holding no references to clp->cl_sem!
+ * Close the current file.
  */
 void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 {
 	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
-	struct nfs4_client *clp = owner->so_client;
 	int newstate;
 
 	atomic_inc(&owner->so_count);
-	down_read(&clp->cl_sem);
 	/* Protect against nfs4_find_state() */
 	spin_lock(&inode->i_lock);
 	if (mode & FMODE_READ)
@@ -523,7 +521,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 out:
 	nfs4_put_open_state(state);
 	nfs4_put_state_owner(owner);
-	up_read(&clp->cl_sem);
 }
 
 /*
@@ -704,8 +701,6 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
 }
 
 /*
- * Called with clp->cl_sem held.
- *
  * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
  * failed with a seqid incrementing error -
  * see comments nfs_fs.h:seqid_mutating_error()
@@ -743,8 +738,6 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 }
 
 /*
- * Called with clp->cl_sem held.
- *
  * Increment the seqid if the LOCK/LOCKU succeeded, or
  * failed with a seqid incrementing error -
  * see comments nfs_fs.h:seqid_mutating_error()
-- 
cgit v1.2.3-58-ga151


From 0a8838f972883112f0a7b259141b24db17583c2d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:14 -0700
Subject: NFSv4: Add missing handling of OPEN_CONFIRM requests on
 CLAIM_DELEGATE_CUR.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 611052bacaff..f57dba815099 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -56,6 +56,7 @@
 #define NFS4_POLL_RETRY_MIN	(1*HZ)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
+static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
@@ -333,11 +334,21 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state
 	memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data));
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
 	nfs_increment_open_seqid(status, arg.seqid);
+	if (status != 0)
+		goto out_free;
+	if(res.rflags & NFS4_OPEN_RESULT_CONFIRM) {
+		status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode),
+				sp, &res.stateid, arg.seqid);
+		if (status != 0)
+			goto out_free;
+	}
+	nfs_confirm_seqid(&sp->so_seqid, 0);
 	if (status >= 0) {
 		memcpy(state->stateid.data, res.stateid.data,
 				sizeof(state->stateid.data));
 		clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
+out_free:
 	nfs_free_seqid(arg.seqid);
 out:
 	dput(parent);
@@ -366,7 +377,7 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 	return err;
 }
 
-static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid)
+static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid)
 {
 	struct nfs_open_confirmargs arg = {
 		.fh             = fh,
-- 
cgit v1.2.3-58-ga151


From faf5f49c2d9c0af2847837c232a432cc146e203b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:15 -0700
Subject: NFSv4: Make NFS clean up byte range locks asynchronously

 Currently we fail to do so if the process was signalled.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h        |   1 +
 fs/nfs/nfs4proc.c       | 161 +++++++++++++++++++++++++++++++++---------------
 fs/nfs/nfs4state.c      |   2 +-
 fs/nfs/nfs4xdr.c        |   9 +--
 include/linux/nfs_xdr.h |   2 +-
 5 files changed, 116 insertions(+), 59 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index d4fcb5d0ce6c..2215cdee43ae 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -249,6 +249,7 @@ extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
 extern void nfs4_schedule_state_recovery(struct nfs4_client *);
+extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f57dba815099..612a9a14aed3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -58,9 +58,9 @@
 
 static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
+static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
-static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
+static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
 extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
@@ -2422,7 +2422,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 }
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
+nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
 	struct nfs4_client *clp = server->nfs4_state;
 
@@ -2500,7 +2500,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 /* This is the error handling routine for processes that are allowed
  * to sleep.
  */
-int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
 	struct nfs4_client *clp = server->nfs4_state;
 	int ret = errorcode;
@@ -2763,68 +2763,127 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 	return res;
 }
 
-static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+struct nfs4_unlockdata {
+	struct nfs_lockargs arg;
+	struct nfs_locku_opargs luargs;
+	struct nfs_lockres res;
+	struct nfs4_lock_state *lsp;
+	struct nfs_open_context *ctx;
+	atomic_t refcount;
+	struct completion completion;
+};
+
+static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata)
 {
-	struct inode *inode = state->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_lockargs arg = {
-		.fh = NFS_FH(inode),
-		.type = nfs4_lck_type(cmd, request),
-		.offset = request->fl_start,
-		.length = nfs4_lck_length(request),
-	};
-	struct nfs_lockres res = {
-		.server = server,
-	};
+	if (atomic_dec_and_test(&calldata->refcount)) {
+		nfs_free_seqid(calldata->luargs.seqid);
+		nfs4_put_lock_state(calldata->lsp);
+		put_nfs_open_context(calldata->ctx);
+		kfree(calldata);
+	}
+}
+
+static void nfs4_locku_complete(struct nfs4_unlockdata *calldata)
+{
+	complete(&calldata->completion);
+	nfs4_locku_release_calldata(calldata);
+}
+
+static void nfs4_locku_done(struct rpc_task *task)
+{
+	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
+
+	nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid);
+	switch (task->tk_status) {
+		case 0:
+			memcpy(calldata->lsp->ls_stateid.data,
+					calldata->res.u.stateid.data,
+					sizeof(calldata->lsp->ls_stateid.data));
+			break;
+		case -NFS4ERR_STALE_STATEID:
+		case -NFS4ERR_EXPIRED:
+			nfs4_schedule_state_recovery(calldata->res.server->nfs4_state);
+			break;
+		default:
+			if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) {
+				rpc_restart_call(task);
+				return;
+			}
+	}
+	nfs4_locku_complete(calldata);
+}
+
+static void nfs4_locku_begin(struct rpc_task *task)
+{
+	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
-		.rpc_cred	= state->owner->so_cred,
+		.rpc_argp       = &calldata->arg,
+		.rpc_resp       = &calldata->res,
+		.rpc_cred	= calldata->lsp->ls_state->owner->so_cred,
 	};
+	int status;
+
+	status = nfs_wait_on_sequence(calldata->luargs.seqid, task);
+	if (status != 0)
+		return;
+	if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) {
+		nfs4_locku_complete(calldata);
+		task->tk_exit = NULL;
+		rpc_exit(task, 0);
+		return;
+	}
+	rpc_call_setup(task, &msg, 0);
+}
+
+static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
+{
+	struct nfs4_unlockdata *calldata;
+	struct inode *inode = state->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_lock_state *lsp;
-	struct nfs_locku_opargs luargs;
 	int status;
-			
+
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
-		goto out;
+		return status;
 	lsp = request->fl_u.nfs4_fl.owner;
 	/* We might have lost the locks! */
 	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
-		goto out;
-	luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid);
-	status = -ENOMEM;
-	if (luargs.seqid == NULL)
-		goto out;
-	memcpy(luargs.stateid.data, lsp->ls_stateid.data, sizeof(luargs.stateid.data));
-	arg.u.locku = &luargs;
-	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	nfs_increment_lock_seqid(status, luargs.seqid);
-
-	if (status == 0)
-		memcpy(lsp->ls_stateid.data, res.u.stateid.data, 
-				sizeof(lsp->ls_stateid.data));
-	nfs_free_seqid(luargs.seqid);
-out:
+		return 0;
+	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
+	if (calldata == NULL)
+		return -ENOMEM;
+	calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (calldata->luargs.seqid == NULL) {
+		kfree(calldata);
+		return -ENOMEM;
+	}
+	calldata->luargs.stateid = &lsp->ls_stateid;
+	calldata->arg.fh = NFS_FH(inode);
+	calldata->arg.type = nfs4_lck_type(cmd, request);
+	calldata->arg.offset = request->fl_start;
+	calldata->arg.length = nfs4_lck_length(request);
+	calldata->arg.u.locku = &calldata->luargs;
+	calldata->res.server = server;
+	calldata->lsp = lsp;
+	atomic_inc(&lsp->ls_count);
+
+	/* Ensure we don't close file until we're done freeing locks! */
+	calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data);
+
+	atomic_set(&calldata->refcount, 2);
+	init_completion(&calldata->completion);
+
+	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin,
+			nfs4_locku_done, calldata);
 	if (status == 0)
-		do_vfs_lock(request->fl_file, request);
+		wait_for_completion_interruptible(&calldata->completion);
+	do_vfs_lock(request->fl_file, request);
+	nfs4_locku_release_calldata(calldata);
 	return status;
 }
 
-static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
-{
-	struct nfs4_exception exception = { };
-	int err;
-
-	do {
-		err = nfs4_handle_exception(NFS_SERVER(state->inode),
-				_nfs4_proc_unlck(state, cmd, request),
-				&exception);
-	} while (exception.retry);
-	return err;
-}
-
 static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *request, int reclaim)
 {
 	struct inode *inode = state->inode;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index bb3574361958..23834c8fb740 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -600,7 +600,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
  * Release reference to lock_state, and free it if we see that
  * it is no longer in use
  */
-static void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
+void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 {
 	struct nfs4_state *state;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 934ec50ea6bf..4706192cfb07 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -776,7 +776,7 @@ static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 	WRITE32(OP_LOCKU);
 	WRITE32(arg->type);
 	WRITE32(opargs->seqid->sequence->counter);
-	WRITEMEM(&opargs->stateid, sizeof(opargs->stateid));
+	WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data));
 	WRITE64(arg->offset);
 	WRITE64(arg->length);
 
@@ -1587,9 +1587,6 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lock
 	};
 	int status;
 
-	status = nfs_wait_on_sequence(args->u.locku->seqid, req->rq_task);
-	if (status != 0)
-		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -2934,8 +2931,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
 
 	status = decode_op_hdr(xdr, OP_LOCKU);
 	if (status == 0) {
-		READ_BUF(sizeof(nfs4_stateid));
-		COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+		READ_BUF(sizeof(res->u.stateid.data));
+		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
 	}
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index cac0df950c66..849f95c5fae4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -188,7 +188,7 @@ struct nfs_lock_opargs {
 
 struct nfs_locku_opargs {
 	struct nfs_seqid *	seqid;
-	nfs4_stateid            stateid;
+	nfs4_stateid *		stateid;
 };
 
 struct nfs_lockargs {
-- 
cgit v1.2.3-58-ga151


From 06735b3454824bd561decbde46111f144e905923 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:15 -0700
Subject: NFSv4: Fix up handling of open_to_lock sequence ids

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 69 +++++++++++++++++++++++--------------------------
 fs/nfs/nfs4xdr.c        | 32 ++++++++++-------------
 include/linux/nfs_xdr.h | 19 +++-----------
 3 files changed, 49 insertions(+), 71 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 612a9a14aed3..35da15342e05 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2889,11 +2889,23 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
+	struct nfs_lock_opargs largs = {
+		.lock_stateid = &lsp->ls_stateid,
+		.open_stateid = &state->stateid,
+		.lock_owner = {
+			.clientid = server->nfs4_state->cl_clientid,
+			.id = lsp->ls_id,
+		},
+		.reclaim = reclaim,
+	};
 	struct nfs_lockargs arg = {
 		.fh = NFS_FH(inode),
 		.type = nfs4_lck_type(cmd, request),
 		.offset = request->fl_start,
 		.length = nfs4_lck_length(request),
+		.u = {
+			.lock = &largs,
+		},
 	};
 	struct nfs_lockres res = {
 		.server = server,
@@ -2904,56 +2916,39 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 		.rpc_resp       = &res,
 		.rpc_cred	= state->owner->so_cred,
 	};
-	struct nfs_lock_opargs largs = {
-		.reclaim = reclaim,
-		.new_lock_owner = 0,
-	};
-	struct nfs_seqid *lock_seqid;
 	int status = -ENOMEM;
 
-	lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
-	if (lock_seqid == NULL)
+	largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (largs.lock_seqid == NULL)
 		return -ENOMEM;
 	if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
 		struct nfs4_state_owner *owner = state->owner;
-		struct nfs_open_to_lock otl = {
-			.lock_owner = {
-				.clientid = server->nfs4_state->cl_clientid,
-			},
-		};
-
-		otl.lock_seqid = lock_seqid;
-		otl.lock_owner.id = lsp->ls_id;
-		memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid));
-		largs.u.open_lock = &otl;
+
+		largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
+		if (largs.open_seqid == NULL)
+			goto out;
 		largs.new_lock_owner = 1;
-		arg.u.lock = &largs;
-		otl.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
-		if (otl.open_seqid != NULL) {
-			status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-			/* increment seqid on success, and seqid mutating errors */
-			nfs_increment_open_seqid(status, otl.open_seqid);
-			nfs_free_seqid(otl.open_seqid);
+		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+		/* increment open seqid on success, and seqid mutating errors */
+		if (largs.new_lock_owner != 0) {
+			nfs_increment_open_seqid(status, largs.open_seqid);
+			if (status == 0)
+				nfs_confirm_seqid(&lsp->ls_seqid, 0);
 		}
-		if (status == 0)
-			nfs_confirm_seqid(&lsp->ls_seqid, 0);
-	} else {
-		struct nfs_exist_lock el;
-		memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid));
-		largs.u.exist_lock = &el;
-		arg.u.lock = &largs;
-		el.seqid = lock_seqid;
+		nfs_free_seqid(largs.open_seqid);
+	} else
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
-	}
-	/* increment seqid on success, and seqid mutating errors*/
-	nfs_increment_lock_seqid(status, lock_seqid);
+	/* increment lock seqid on success, and seqid mutating errors*/
+	nfs_increment_lock_seqid(status, largs.lock_seqid);
 	/* save the returned stateid. */
 	if (status == 0) {
-		memcpy(lsp->ls_stateid.data, res.u.stateid.data, sizeof(lsp->ls_stateid.data));
+		memcpy(lsp->ls_stateid.data, res.u.stateid.data,
+				sizeof(lsp->ls_stateid.data));
 		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
 	} else if (status == -NFS4ERR_DENIED)
 		status = -EAGAIN;
-	nfs_free_seqid(lock_seqid);
+out:
+	nfs_free_seqid(largs.lock_seqid);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4706192cfb07..c5c75235c5b8 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -729,22 +729,18 @@ static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
 	WRITE64(arg->length);
 	WRITE32(opargs->new_lock_owner);
 	if (opargs->new_lock_owner){
-		struct nfs_open_to_lock *ol = opargs->u.open_lock;
-
 		RESERVE_SPACE(40);
-		WRITE32(ol->open_seqid->sequence->counter);
-		WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid));
-		WRITE32(ol->lock_seqid->sequence->counter);
-		WRITE64(ol->lock_owner.clientid);
+		WRITE32(opargs->open_seqid->sequence->counter);
+		WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data));
+		WRITE32(opargs->lock_seqid->sequence->counter);
+		WRITE64(opargs->lock_owner.clientid);
 		WRITE32(4);
-		WRITE32(ol->lock_owner.id);
+		WRITE32(opargs->lock_owner.id);
 	}
 	else {
-		struct nfs_exist_lock *el = opargs->u.exist_lock;
-
 		RESERVE_SPACE(20);
-		WRITEMEM(&el->stateid, sizeof(el->stateid));
-		WRITE32(el->seqid->sequence->counter);
+		WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data));
+		WRITE32(opargs->lock_seqid->sequence->counter);
 	}
 
 	return 0;
@@ -1535,16 +1531,14 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_locka
 		.nops   = 2,
 	};
 	struct nfs_lock_opargs *opargs = args->u.lock;
-	struct nfs_seqid *seqid;
 	int status;
 
-	if (opargs->new_lock_owner)
-		seqid = opargs->u.open_lock->lock_seqid;
-	else
-		seqid = opargs->u.exist_lock->seqid;
-	status = nfs_wait_on_sequence(seqid, req->rq_task);
+	status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task);
 	if (status != 0)
 		goto out;
+	/* Do we need to do an open_to_lock_owner? */
+	if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+		opargs->new_lock_owner = 0;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -2908,8 +2902,8 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
 
 	status = decode_op_hdr(xdr, OP_LOCK);
 	if (status == 0) {
-		READ_BUF(sizeof(nfs4_stateid));
-		COPYMEM(&res->u.stateid, sizeof(res->u.stateid));
+		READ_BUF(sizeof(res->u.stateid.data));
+		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
 	} else if (status == -NFS4ERR_DENIED)
 		return decode_lock_denied(xdr, &res->u.denied);
 	return status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 849f95c5fae4..57efcc27f20b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -165,25 +165,14 @@ struct nfs_lowner {
 	u32                     id;
 };
 
-struct nfs_open_to_lock {
-	struct nfs_seqid *	open_seqid;
-	nfs4_stateid            open_stateid;
+struct nfs_lock_opargs {
 	struct nfs_seqid *	lock_seqid;
+	nfs4_stateid *		lock_stateid;
+	struct nfs_seqid *	open_seqid;
+	nfs4_stateid *		open_stateid;
 	struct nfs_lowner       lock_owner;
-};
-
-struct nfs_exist_lock {
-	nfs4_stateid            stateid;
-	struct nfs_seqid *	seqid;
-};
-
-struct nfs_lock_opargs {
 	__u32                   reclaim;
 	__u32                   new_lock_owner;
-	union {
-		struct nfs_open_to_lock *open_lock;
-		struct nfs_exist_lock   *exist_lock;
-	} u;
 };
 
 struct nfs_locku_opargs {
-- 
cgit v1.2.3-58-ga151


From 039c4d7a82d8268ec71f59679460b41d0dd9b225 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:16 -0700
Subject:  NFS: Fix up a race in the NFS implementation of GETLK

 ...and fix a memory corruption bug due to improper use of memcpy() on
 a struct file_lock.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6bdcfa95de94..572d8593486f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -376,22 +376,31 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
+	struct file_lock *cfl;
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
 
 	lock_kernel();
-	/* Use local locking if mounted with "-onolock" */
-	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
-		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
-	else {
-		struct file_lock *cfl = posix_test_lock(filp, fl);
-
-		fl->fl_type = F_UNLCK;
-		if (cfl != NULL)
-			memcpy(fl, cfl, sizeof(*fl));
+	/* Try local locking first */
+	cfl = posix_test_lock(filp, fl);
+	if (cfl != NULL) {
+		locks_copy_lock(fl, cfl);
+		goto out;
 	}
+
+	if (nfs_have_delegation(inode, FMODE_READ))
+		goto out_noconflict;
+
+	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+		goto out_noconflict;
+
+	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+out:
 	unlock_kernel();
 	return status;
+out_noconflict:
+	fl->fl_type = F_UNLCK;
+	goto out;
 }
 
 static int do_vfs_lock(struct file *file, struct file_lock *fl)
-- 
cgit v1.2.3-58-ga151


From 834f2a4a1554dc5b2598038b3fe8703defcbe467 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:16 -0700
Subject: VFS: Allow the filesystem to return a full file pointer on open
 intent

 This is needed by NFSv4 for atomicity reasons: our open command is in
 fact a lookup+open, so we need to be able to propagate open context
 information from lookup() into the resulting struct file's
 private_data field.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/exec.c             | 12 +++----
 fs/namei.c            | 93 +++++++++++++++++++++++++++++++++++++++++++++++----
 fs/open.c             | 79 +++++++++++++++++++++++++++++++++++--------
 include/linux/namei.h |  8 +++++
 4 files changed, 165 insertions(+), 27 deletions(-)

(limited to 'fs')

diff --git a/fs/exec.c b/fs/exec.c
index a04a575ad433..d2208f7c87db 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -126,8 +126,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	struct nameidata nd;
 	int error;
 
-	nd.intent.open.flags = FMODE_READ;
-	error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+	error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ);
 	if (error)
 		goto out;
 
@@ -139,7 +138,7 @@ asmlinkage long sys_uselib(const char __user * library)
 	if (error)
 		goto exit;
 
-	file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+	file = nameidata_to_filp(&nd, O_RDONLY);
 	error = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out;
@@ -167,6 +166,7 @@ asmlinkage long sys_uselib(const char __user * library)
 out:
   	return error;
 exit:
+	release_open_intent(&nd);
 	path_release(&nd);
 	goto out;
 }
@@ -490,8 +490,7 @@ struct file *open_exec(const char *name)
 	int err;
 	struct file *file;
 
-	nd.intent.open.flags = FMODE_READ;
-	err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+	err = path_lookup_open(name, LOOKUP_FOLLOW, &nd, FMODE_READ);
 	file = ERR_PTR(err);
 
 	if (!err) {
@@ -504,7 +503,7 @@ struct file *open_exec(const char *name)
 				err = -EACCES;
 			file = ERR_PTR(err);
 			if (!err) {
-				file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
+				file = nameidata_to_filp(&nd, O_RDONLY);
 				if (!IS_ERR(file)) {
 					err = deny_write_access(file);
 					if (err) {
@@ -516,6 +515,7 @@ out:
 				return file;
 			}
 		}
+		release_open_intent(&nd);
 		path_release(&nd);
 	}
 	goto out;
diff --git a/fs/namei.c b/fs/namei.c
index aa62dbda93ac..0d1dff7d3d95 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -28,6 +28,7 @@
 #include <linux/syscalls.h>
 #include <linux/mount.h>
 #include <linux/audit.h>
+#include <linux/file.h>
 #include <asm/namei.h>
 #include <asm/uaccess.h>
 
@@ -317,6 +318,18 @@ void path_release_on_umount(struct nameidata *nd)
 	mntput_no_expire(nd->mnt);
 }
 
+/**
+ * release_open_intent - free up open intent resources
+ * @nd: pointer to nameidata
+ */
+void release_open_intent(struct nameidata *nd)
+{
+	if (nd->intent.open.file->f_dentry == NULL)
+		put_filp(nd->intent.open.file);
+	else
+		fput(nd->intent.open.file);
+}
+
 /*
  * Internal lookup() using the new generic dcache.
  * SMP-safe
@@ -1052,6 +1065,70 @@ out:
 	return retval;
 }
 
+static int __path_lookup_intent_open(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags, int create_mode)
+{
+	struct file *filp = get_empty_filp();
+	int err;
+
+	if (filp == NULL)
+		return -ENFILE;
+	nd->intent.open.file = filp;
+	nd->intent.open.flags = open_flags;
+	nd->intent.open.create_mode = create_mode;
+	err = path_lookup(name, lookup_flags|LOOKUP_OPEN, nd);
+	if (IS_ERR(nd->intent.open.file)) {
+		if (err == 0) {
+			err = PTR_ERR(nd->intent.open.file);
+			path_release(nd);
+		}
+	} else if (err != 0)
+		release_open_intent(nd);
+	return err;
+}
+
+/**
+ * path_lookup_open - lookup a file path with open intent
+ * @name: pointer to file name
+ * @lookup_flags: lookup intent flags
+ * @nd: pointer to nameidata
+ * @open_flags: open intent flags
+ */
+int path_lookup_open(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags)
+{
+	return __path_lookup_intent_open(name, lookup_flags, nd,
+			open_flags, 0);
+}
+
+/**
+ * path_lookup_create - lookup a file path with open + create intent
+ * @name: pointer to file name
+ * @lookup_flags: lookup intent flags
+ * @nd: pointer to nameidata
+ * @open_flags: open intent flags
+ * @create_mode: create intent flags
+ */
+int path_lookup_create(const char *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags, int create_mode)
+{
+	return __path_lookup_intent_open(name, lookup_flags|LOOKUP_CREATE, nd,
+			open_flags, create_mode);
+}
+
+int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
+		struct nameidata *nd, int open_flags)
+{
+	char *tmp = getname(name);
+	int err = PTR_ERR(tmp);
+
+	if (!IS_ERR(tmp)) {
+		err = __path_lookup_intent_open(tmp, lookup_flags, nd, open_flags, 0);
+		putname(tmp);
+	}
+	return err;
+}
+
 /*
  * Restricted form of lookup. Doesn't follow links, single-component only,
  * needs parent already locked. Doesn't follow mounts.
@@ -1416,27 +1493,27 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
  */
 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 {
-	int acc_mode, error = 0;
+	int acc_mode, error;
 	struct path path;
 	struct dentry *dir;
 	int count = 0;
 
 	acc_mode = ACC_MODE(flag);
 
+	/* O_TRUNC implies we need access checks for write permissions */
+	if (flag & O_TRUNC)
+		acc_mode |= MAY_WRITE;
+
 	/* Allow the LSM permission hook to distinguish append 
 	   access from general write access. */
 	if (flag & O_APPEND)
 		acc_mode |= MAY_APPEND;
 
-	/* Fill in the open() intent data */
-	nd->intent.open.flags = flag;
-	nd->intent.open.create_mode = mode;
-
 	/*
 	 * The simplest case - just a plain lookup.
 	 */
 	if (!(flag & O_CREAT)) {
-		error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd);
+		error = path_lookup_open(pathname, lookup_flags(flag), nd, flag);
 		if (error)
 			return error;
 		goto ok;
@@ -1445,7 +1522,7 @@ int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
 	/*
 	 * Create - we need to know the parent.
 	 */
-	error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
+	error = path_lookup_create(pathname, LOOKUP_PARENT, nd, flag, mode);
 	if (error)
 		return error;
 
@@ -1520,6 +1597,8 @@ ok:
 exit_dput:
 	dput_path(&path, nd);
 exit:
+	if (!IS_ERR(nd->intent.open.file))
+		release_open_intent(nd);
 	path_release(nd);
 	return error;
 
diff --git a/fs/open.c b/fs/open.c
index f0d90cf0495c..8d06ec911fd9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -739,7 +739,8 @@ asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
 }
 
 static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
-					int flags, struct file *f)
+					int flags, struct file *f,
+					int (*open)(struct inode *, struct file *))
 {
 	struct inode *inode;
 	int error;
@@ -761,11 +762,14 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
 	f->f_op = fops_get(inode->i_fop);
 	file_move(f, &inode->i_sb->s_files);
 
-	if (f->f_op && f->f_op->open) {
-		error = f->f_op->open(inode,f);
+	if (!open && f->f_op)
+		open = f->f_op->open;
+	if (open) {
+		error = open(inode, f);
 		if (error)
 			goto cleanup_all;
 	}
+
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
 	file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
@@ -814,28 +818,75 @@ struct file *filp_open(const char * filename, int flags, int mode)
 {
 	int namei_flags, error;
 	struct nameidata nd;
-	struct file *f;
 
 	namei_flags = flags;
 	if ((namei_flags+1) & O_ACCMODE)
 		namei_flags++;
-	if (namei_flags & O_TRUNC)
-		namei_flags |= 2;
-
-	error = -ENFILE;
-	f = get_empty_filp();
-	if (f == NULL)
-		return ERR_PTR(error);
 
 	error = open_namei(filename, namei_flags, mode, &nd);
 	if (!error)
-		return __dentry_open(nd.dentry, nd.mnt, flags, f);
+		return nameidata_to_filp(&nd, flags);
 
-	put_filp(f);
 	return ERR_PTR(error);
 }
 EXPORT_SYMBOL(filp_open);
 
+/**
+ * lookup_instantiate_filp - instantiates the open intent filp
+ * @nd: pointer to nameidata
+ * @dentry: pointer to dentry
+ * @open: open callback
+ *
+ * Helper for filesystems that want to use lookup open intents and pass back
+ * a fully instantiated struct file to the caller.
+ * This function is meant to be called from within a filesystem's
+ * lookup method.
+ * Note that in case of error, nd->intent.open.file is destroyed, but the
+ * path information remains valid.
+ * If the open callback is set to NULL, then the standard f_op->open()
+ * filesystem callback is substituted.
+ */
+struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
+		int (*open)(struct inode *, struct file *))
+{
+	if (IS_ERR(nd->intent.open.file))
+		goto out;
+	if (IS_ERR(dentry))
+		goto out_err;
+	nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->mnt),
+					     nd->intent.open.flags - 1,
+					     nd->intent.open.file,
+					     open);
+out:
+	return nd->intent.open.file;
+out_err:
+	release_open_intent(nd);
+	nd->intent.open.file = (struct file *)dentry;
+	goto out;
+}
+EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
+
+/**
+ * nameidata_to_filp - convert a nameidata to an open filp.
+ * @nd: pointer to nameidata
+ * @flags: open flags
+ *
+ * Note that this function destroys the original nameidata
+ */
+struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+{
+	struct file *filp;
+
+	/* Pick up the filp from the open intent */
+	filp = nd->intent.open.file;
+	/* Has the filesystem initialised the file for us? */
+	if (filp->f_dentry == NULL)
+		filp = __dentry_open(nd->dentry, nd->mnt, flags, filp, NULL);
+	else
+		path_release(nd);
+	return filp;
+}
+
 struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 {
 	int error;
@@ -846,7 +897,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
 	if (f == NULL)
 		return ERR_PTR(error);
 
-	return __dentry_open(dentry, mnt, flags, f);
+	return __dentry_open(dentry, mnt, flags, f, NULL);
 }
 EXPORT_SYMBOL(dentry_open);
 
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 7db67b008cac..1c975d0d9e94 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -8,6 +8,7 @@ struct vfsmount;
 struct open_intent {
 	int	flags;
 	int	create_mode;
+	struct file *file;
 };
 
 enum { MAX_NESTED_LINKS = 5 };
@@ -65,6 +66,13 @@ extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
 extern void path_release(struct nameidata *);
 extern void path_release_on_umount(struct nameidata *);
 
+extern int __user_path_lookup_open(const char __user *, unsigned lookup_flags, struct nameidata *nd, int open_flags);
+extern int path_lookup_open(const char *, unsigned lookup_flags, struct nameidata *, int open_flags);
+extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
+		int (*open)(struct inode *, struct file *));
+extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
+extern void release_open_intent(struct nameidata *);
+
 extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
 extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
 
-- 
cgit v1.2.3-58-ga151


From 02a913a73b52071e93f4b76db3e86138d19efffd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:17 -0700
Subject: NFSv4: Eliminate nfsv4 open race...

 Make NFSv4 return the fully initialized file pointer with the
 stateid that it created in the lookup w/intent.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c            |  29 +++++-----
 fs/nfs/nfs3proc.c       |   2 +-
 fs/nfs/nfs4_fs.h        |   4 +-
 fs/nfs/nfs4proc.c       | 137 ++++++++++++++++++++----------------------------
 fs/nfs/proc.c           |   2 +-
 include/linux/nfs_xdr.h |   2 +-
 6 files changed, 74 insertions(+), 102 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c70eabd6d179..a6e251f21fd8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -914,7 +914,6 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct dentry *res = NULL;
-	struct inode *inode = NULL;
 	int error;
 
 	/* Check that we are indeed trying to open this file */
@@ -928,8 +927,10 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
 	/* Let vfs_create() deal with O_EXCL */
-	if (nd->intent.open.flags & O_EXCL)
-		goto no_entry;
+	if (nd->intent.open.flags & O_EXCL) {
+		d_add(dentry, NULL);
+		goto out;
+	}
 
 	/* Open the file on the server */
 	lock_kernel();
@@ -943,18 +944,18 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 
 	if (nd->intent.open.flags & O_CREAT) {
 		nfs_begin_data_update(dir);
-		inode = nfs4_atomic_open(dir, dentry, nd);
+		res = nfs4_atomic_open(dir, dentry, nd);
 		nfs_end_data_update(dir);
 	} else
-		inode = nfs4_atomic_open(dir, dentry, nd);
+		res = nfs4_atomic_open(dir, dentry, nd);
 	unlock_kernel();
-	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
+	if (IS_ERR(res)) {
+		error = PTR_ERR(res);
 		switch (error) {
 			/* Make a negative dentry */
 			case -ENOENT:
-				inode = NULL;
-				break;
+				res = NULL;
+				goto out;
 			/* This turned out not to be a regular file */
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
@@ -962,13 +963,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
-				res = ERR_PTR(error);
 				goto out;
 		}
-	}
-no_entry:
-	res = d_add_unique(dentry, inode);
-	if (res != NULL)
+	} else if (res != NULL)
 		dentry = res;
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1012,7 +1009,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 	 */
 	lock_kernel();
 	verifier = nfs_save_change_attribute(dir);
-	ret = nfs4_open_revalidate(dir, dentry, openflags);
+	ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
 	if (!ret)
 		nfs_set_verifier(dentry, verifier);
 	unlock_kernel();
@@ -1135,7 +1132,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 
 	lock_kernel();
 	nfs_begin_data_update(dir);
-	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
+	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
 	nfs_end_data_update(dir);
 	if (error != 0)
 		goto out_err;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index edc95514046d..df80477c5af4 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -299,7 +299,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
  */
 static int
 nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		 int flags)
+		 int flags, struct nameidata *nd)
 {
 	struct nfs_fh		fhandle;
 	struct nfs_fattr	fattr;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 2215cdee43ae..8a3788199052 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -215,8 +215,8 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
 extern int nfs4_proc_renew(struct nfs4_client *);
 extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
-extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
-extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
+extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 
 extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
 extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 35da15342e05..c9ecb8119632 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -47,6 +47,7 @@
 #include <linux/nfs_page.h>
 #include <linux/smp_lock.h>
 #include <linux/namei.h>
+#include <linux/mount.h>
 
 #include "nfs4_fs.h"
 #include "delegation.h"
@@ -947,12 +948,26 @@ out:
 	return status;
 }
 
-struct inode *
+static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
+{
+	struct file *filp;
+
+	filp = lookup_instantiate_filp(nd, dentry, NULL);
+	if (!IS_ERR(filp)) {
+		struct nfs_open_context *ctx;
+		ctx = (struct nfs_open_context *)filp->private_data;
+		ctx->state = state;
+	} else
+		nfs4_close_state(state, nd->intent.open.flags);
+}
+
+struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct iattr attr;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
+	struct dentry *res;
 
 	if (nd->flags & LOOKUP_CREATE) {
 		attr.ia_mode = nd->intent.open.create_mode;
@@ -966,16 +981,23 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 
 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
 	if (IS_ERR(cred))
-		return (struct inode *)cred;
+		return (struct dentry *)cred;
 	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
-	if (IS_ERR(state))
-		return (struct inode *)state;
-	return state->inode;
+	if (IS_ERR(state)) {
+		if (PTR_ERR(state) == -ENOENT)
+			d_add(dentry, NULL);
+		return (struct dentry *)state;
+	}
+	res = d_add_unique(dentry, state->inode);
+	if (res != NULL)
+		dentry = res;
+	nfs4_intent_set_file(nd, dentry, state);
+	return res;
 }
 
 int
-nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
+nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
 {
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -988,18 +1010,30 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
 	if (IS_ERR(state))
 		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
 	put_rpccred(cred);
-	if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
-		return 1;
-	if (IS_ERR(state))
-		return 0;
+	if (IS_ERR(state)) {
+		switch (PTR_ERR(state)) {
+			case -EPERM:
+			case -EACCES:
+			case -EDQUOT:
+			case -ENOSPC:
+			case -EROFS:
+				lookup_instantiate_filp(nd, (struct dentry *)state, NULL);
+				return 1;
+			case -ENOENT:
+				if (dentry->d_inode == NULL)
+					return 1;
+		}
+		goto out_drop;
+	}
 	inode = state->inode;
+	iput(inode);
 	if (inode == dentry->d_inode) {
-		iput(inode);
+		nfs4_intent_set_file(nd, dentry, state);
 		return 1;
 	}
-	d_drop(dentry);
 	nfs4_close_state(state, openflags);
-	iput(inode);
+out_drop:
+	d_drop(dentry);
 	return 0;
 }
 
@@ -1500,7 +1534,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata)
 
 static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                 int flags)
+                 int flags, struct nameidata *nd)
 {
 	struct nfs4_state *state;
 	struct rpc_cred *cred;
@@ -1522,13 +1556,13 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		struct nfs_fattr fattr;
 		status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
 		                     NFS_FH(state->inode), sattr, state);
-		if (status == 0) {
+		if (status == 0)
 			nfs_setattr_update_inode(state->inode, sattr);
-			goto out;
-		}
-	} else if (flags != 0)
-		goto out;
-	nfs4_close_state(state, flags);
+	}
+	if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
+		nfs4_intent_set_file(nd, dentry, state);
+	else
+		nfs4_close_state(state, flags);
 out:
 	return status;
 }
@@ -2175,65 +2209,6 @@ nfs4_proc_renew(struct nfs4_client *clp)
 	return 0;
 }
 
-/*
- * We will need to arrange for the VFS layer to provide an atomic open.
- * Until then, this open method is prone to inefficiency and race conditions
- * due to the lookup, potential create, and open VFS calls from sys_open()
- * placed on the wire.
- */
-static int
-nfs4_proc_file_open(struct inode *inode, struct file *filp)
-{
-	struct dentry *dentry = filp->f_dentry;
-	struct nfs_open_context *ctx;
-	struct nfs4_state *state = NULL;
-	struct rpc_cred *cred;
-	int status = -ENOMEM;
-
-	dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
-	                       (int)dentry->d_parent->d_name.len,
-	                       dentry->d_parent->d_name.name,
-	                       (int)dentry->d_name.len, dentry->d_name.name);
-
-
-	/* Find our open stateid */
-	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
-	if (IS_ERR(cred))
-		return PTR_ERR(cred);
-	ctx = alloc_nfs_open_context(dentry, cred);
-	put_rpccred(cred);
-	if (unlikely(ctx == NULL))
-		return -ENOMEM;
-	status = -EIO; /* ERACE actually */
-	state = nfs4_find_state(inode, cred, filp->f_mode);
-	if (unlikely(state == NULL))
-		goto no_state;
-	ctx->state = state;
-	nfs4_close_state(state, filp->f_mode);
-	ctx->mode = filp->f_mode;
-	nfs_file_set_open_context(filp, ctx);
-	put_nfs_open_context(ctx);
-	if (filp->f_mode & FMODE_WRITE)
-		nfs_begin_data_update(inode);
-	return 0;
-no_state:
-	printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
-	put_nfs_open_context(ctx);
-	return status;
-}
-
-/*
- * Release our state
- */
-static int
-nfs4_proc_file_release(struct inode *inode, struct file *filp)
-{
-	if (filp->f_mode & FMODE_WRITE)
-		nfs_end_data_update(inode);
-	nfs_file_clear_open_context(filp);
-	return 0;
-}
-
 static inline int nfs4_server_supports_acls(struct nfs_server *server)
 {
 	return (server->caps & NFS_CAP_ACLS)
@@ -3145,8 +3120,8 @@ struct nfs_rpc_ops	nfs_v4_clientops = {
 	.read_setup	= nfs4_proc_read_setup,
 	.write_setup	= nfs4_proc_write_setup,
 	.commit_setup	= nfs4_proc_commit_setup,
-	.file_open      = nfs4_proc_file_open,
-	.file_release   = nfs4_proc_file_release,
+	.file_open      = nfs_open,
+	.file_release   = nfs_release,
 	.lock		= nfs4_proc_lock,
 	.clear_acl_cache = nfs4_zap_acl_attr,
 };
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index be23c3fb9260..8fef86523d7f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -216,7 +216,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
 
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-		int flags)
+		int flags, struct nameidata *nd)
 {
 	struct nfs_fh		fhandle;
 	struct nfs_fattr	fattr;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 57efcc27f20b..60086dac11d5 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -712,7 +712,7 @@ struct nfs_rpc_ops {
 	int	(*write)   (struct nfs_write_data *);
 	int	(*commit)  (struct nfs_write_data *);
 	int	(*create)  (struct inode *, struct dentry *,
-			    struct iattr *, int);
+			    struct iattr *, int, struct nameidata *);
 	int	(*remove)  (struct inode *, struct qstr *);
 	int	(*unlink_setup)  (struct rpc_message *,
 			    struct dentry *, struct qstr *);
-- 
cgit v1.2.3-58-ga151


From 6f926b5ba75c568296ec227e7d782db4ddbdca5c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:18 -0700
Subject: [NFS]: Check that the server returns a valid regular file to our OPEN
 request

 Since it appears that some servers don't...

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      |  4 +++-
 fs/nfs/nfs4proc.c | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a6e251f21fd8..ac331d2d4c4a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -957,10 +957,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 				res = NULL;
 				goto out;
 			/* This turned out not to be a regular file */
+			case -EISDIR:
+			case -ENOTDIR:
+				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
-			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
 				goto out;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c9ecb8119632..3db1c9f0b09c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -419,6 +419,22 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner  *sp, stru
 	o_arg->clientid = sp->so_client->cl_clientid;
 
 	status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
+	if (status == 0) {
+		/* OPEN on anything except a regular file is disallowed in NFSv4 */
+		switch (o_res->f_attr->mode & S_IFMT) {
+			case S_IFREG:
+				break;
+			case S_IFLNK:
+				status = -ELOOP;
+				break;
+			case S_IFDIR:
+				status = -EISDIR;
+				break;
+			default:
+				status = -ENOTDIR;
+		}
+	}
+
 	nfs_increment_open_seqid(status, o_arg->seqid);
 	if (status != 0)
 		goto out;
-- 
cgit v1.2.3-58-ga151


From cdce5d6b94b6182f6d8a5b7b52923933e98cbc92 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:18 -0700
Subject: VFS: Make link_path_walk set LOOKUP_CONTINUE before calling
 permission().

 This will allow nfs_permission() to perform additional optimizations when
 walking the path, by folding the ACCESS(MAY_EXEC) call on the directory
 into the lookup revalidation.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index 0d1dff7d3d95..aaaa81036234 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -763,6 +763,7 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 		struct qstr this;
 		unsigned int c;
 
+		nd->flags |= LOOKUP_CONTINUE;
 		err = exec_permission_lite(inode, nd);
 		if (err == -EAGAIN) { 
 			err = permission(inode, MAY_EXEC, nd);
@@ -815,7 +816,6 @@ static fastcall int __link_path_walk(const char * name, struct nameidata *nd)
 			if (err < 0)
 				break;
 		}
-		nd->flags |= LOOKUP_CONTINUE;
 		/* This does the actual lookups.. */
 		err = do_lookup(nd, &this, &next);
 		if (err)
-- 
cgit v1.2.3-58-ga151


From cae7a073a4c5484cc5713eab606bf54b46724ab3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:19 -0700
Subject: NFSv4: Return delegation upon rename or removal of file.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c |  2 +-
 fs/nfs/delegation.h | 16 +++++++++++++++-
 fs/nfs/dir.c        |  3 +++
 fs/nfs/inode.c      |  3 +--
 4 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 4a36839f0bbd..44135af9894c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -142,7 +142,7 @@ static void nfs_msync_inode(struct inode *inode)
 /*
  * Basic procedure for returning a delegation to the server
  */
-int nfs_inode_return_delegation(struct inode *inode)
+int __nfs_inode_return_delegation(struct inode *inode)
 {
 	struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
 	struct nfs_inode *nfsi = NFS_I(inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 3f6c45a29d6a..8017846b561f 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -25,7 +25,7 @@ struct nfs_delegation {
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int nfs_inode_return_delegation(struct inode *inode);
+int __nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
 struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle);
@@ -47,11 +47,25 @@ static inline int nfs_have_delegation(struct inode *inode, int flags)
 		return 1;
 	return 0;
 }
+
+static inline int nfs_inode_return_delegation(struct inode *inode)
+{
+	int err = 0;
+
+	if (NFS_I(inode)->delegation != NULL)
+		err = __nfs_inode_return_delegation(inode);
+	return err;
+}
 #else
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
 	return 0;
 }
+
+static inline int nfs_inode_return_delegation(struct inode *inode)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac331d2d4c4a..72f50c0117b1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -801,6 +801,7 @@ static int nfs_dentry_delete(struct dentry *dentry)
  */
 static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
 {
+	nfs_inode_return_delegation(inode);
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
 		lock_kernel();
 		inode->i_nlink--;
@@ -1329,6 +1330,7 @@ static int nfs_safe_remove(struct dentry *dentry)
 
 	nfs_begin_data_update(dir);
 	if (inode != NULL) {
+		nfs_inode_return_delegation(inode);
 		nfs_begin_data_update(inode);
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
 		/* The VFS may want to delete this inode */
@@ -1547,6 +1549,7 @@ go_ahead:
 		nfs_wb_all(old_inode);
 		shrink_dcache_parent(old_dentry);
 	}
+	nfs_inode_return_delegation(old_inode);
 
 	if (new_inode)
 		d_delete(new_dentry);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d7abaa00473a..6b0f7fe6bd1d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1671,8 +1671,7 @@ static void nfs4_clear_inode(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	/* If we are holding a delegation, return it! */
-	if (nfsi->delegation != NULL)
-		nfs_inode_return_delegation(inode);
+	nfs_inode_return_delegation(inode);
 	/* First call standard NFS clear_inode() code */
 	nfs_clear_inode(inode);
 	/* Now clear out any remaining state */
-- 
cgit v1.2.3-58-ga151


From 642ac54923e0291ae2c8e42edde18d8c9c0a3c84 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:19 -0700
Subject: NFSv4: Return delegations in case we're changing ACLs

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c    | 5 +++++
 fs/nfs/nfs4proc.c | 1 +
 2 files changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6b0f7fe6bd1d..65d5ab45ddc5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -853,6 +853,11 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 			filemap_fdatawait(inode->i_mapping);
 		nfs_wb_all(inode);
 	}
+	/*
+	 * Return any delegations if we're going to change ACLs
+	 */
+	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+		nfs_inode_return_delegation(inode);
 	error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
 	if (error == 0)
 		nfs_refresh_inode(inode, &fattr);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3db1c9f0b09c..c10dcd12af51 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2405,6 +2405,7 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
 
 	if (!nfs4_server_supports_acls(server))
 		return -EOPNOTSUPP;
+	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
 	ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
 	if (ret == 0)
-- 
cgit v1.2.3-58-ga151


From b8e5c4c2978fa666287525a9de2fa648a7581262 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:20 -0700
Subject: NFSv4: If a delegated open fails, ensure that we return the
 delegation

 Unless of course the open fails due to permission issues.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c10dcd12af51..a8be9af610ac 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -658,6 +658,8 @@ out_err:
 	}
 	up_read(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
+	if (err != -EACCES)
+		nfs_inode_return_delegation(inode);
 	return err;
 }
 
-- 
cgit v1.2.3-58-ga151


From 550f57470c6506f5ef3c708335dea6bd48bf3dc4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:21 -0700
Subject: NFSv4: Ensure that we recover from the OPEN + OPEN_CONFIRM
 BAD_STATEID race

 If the server is in the unconfirmed OPEN state for a given open owner
 and receives a second OPEN for the same open owner, it will cancel the
 state of the first request and set up an OPEN_CONFIRM for the second.

 This can cause a race that is discussed in rfc3530 on page 181.

 The following patch allows the client to recover by retrying the
 original open request.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a8be9af610ac..ff378126ccd7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -785,6 +785,16 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 			exception.retry = 1;
 			continue;
 		}
+		/*
+		 * BAD_STATEID on OPEN means that the server cancelled our
+		 * state before it received the OPEN_CONFIRM.
+		 * Recover by retrying the request as per the discussion
+		 * on Page 181 of RFC3530.
+		 */
+		if (status == -NFS4ERR_BAD_STATEID) {
+			exception.retry = 1;
+			continue;
+		}
 		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
 					status, &exception));
 	} while (exception.retry);
-- 
cgit v1.2.3-58-ga151


From 4c780a4688b421baa896b59778c05d7e068e479f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:21 -0700
Subject: Fix Connectathon locking test failure

 We currently fail Connectathon test 6.10 in the case of 32-bit locks due
 to incorrect error checking.
 Also add support for l->l_len < 0 to 64-bit locks.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/locks.c | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 7eb1d77b9204..a1e8b2248014 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -316,21 +316,22 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 	/* POSIX-1996 leaves the case l->l_len < 0 undefined;
 	   POSIX-2001 defines it. */
 	start += l->l_start;
-	end = start + l->l_len - 1;
-	if (l->l_len < 0) {
+	if (start < 0)
+		return -EINVAL;
+	fl->fl_end = OFFSET_MAX;
+	if (l->l_len > 0) {
+		end = start + l->l_len - 1;
+		fl->fl_end = end;
+	} else if (l->l_len < 0) {
 		end = start - 1;
+		fl->fl_end = end;
 		start += l->l_len;
+		if (start < 0)
+			return -EINVAL;
 	}
-
-	if (start < 0)
-		return -EINVAL;
-	if (l->l_len > 0 && end < 0)
-		return -EOVERFLOW;
-
 	fl->fl_start = start;	/* we record the absolute position */
-	fl->fl_end = end;
-	if (l->l_len == 0)
-		fl->fl_end = OFFSET_MAX;
+	if (fl->fl_end < fl->fl_start)
+		return -EOVERFLOW;
 	
 	fl->fl_owner = current->files;
 	fl->fl_pid = current->tgid;
@@ -362,14 +363,21 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 		return -EINVAL;
 	}
 
-	if (((start += l->l_start) < 0) || (l->l_len < 0))
+	start += l->l_start;
+	if (start < 0)
 		return -EINVAL;
-	fl->fl_end = start + l->l_len - 1;
-	if (l->l_len > 0 && fl->fl_end < 0)
-		return -EOVERFLOW;
+	fl->fl_end = OFFSET_MAX;
+	if (l->l_len > 0) {
+		fl->fl_end = start + l->l_len - 1;
+	} else if (l->l_len < 0) {
+		fl->fl_end = start - 1;
+		start += l->l_len;
+		if (start < 0)
+			return -EINVAL;
+	}
 	fl->fl_start = start;	/* we record the absolute position */
-	if (l->l_len == 0)
-		fl->fl_end = OFFSET_MAX;
+	if (fl->fl_end < fl->fl_start)
+		return -EOVERFLOW;
 	
 	fl->fl_owner = current->files;
 	fl->fl_pid = current->tgid;
-- 
cgit v1.2.3-58-ga151


From 6fe43f9e3701f7a9f2be151a5e6cfe94b87e92f9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 14:20:22 -0700
Subject: NFS: Fix rename of directory onto empty directory

 If someone tries to rename a directory onto an empty directory, we
 currently fail and return EBUSY.
 This patch ensures that we try the rename if both source and target
 are directories, and that we fail with a correct error of EISDIR if
 the source is not a directory.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 72f50c0117b1..eb50c19fc253 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1511,9 +1511,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	 */
 	if (!new_inode)
 		goto go_ahead;
-	if (S_ISDIR(new_inode->i_mode))
-		goto out;
-	else if (atomic_read(&new_dentry->d_count) > 2) {
+	if (S_ISDIR(new_inode->i_mode)) {
+		error = -EISDIR;
+		if (!S_ISDIR(old_inode->i_mode))
+			goto out;
+	} else if (atomic_read(&new_dentry->d_count) > 2) {
 		int err;
 		/* copy the target dentry's name */
 		dentry = d_alloc(new_dentry->d_parent,
-- 
cgit v1.2.3-58-ga151


From 7f709a48fa798cfa0f2f777c8752e12995054f78 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 18 Oct 2005 23:19:39 -0700
Subject: NFSv4: Fix an oopsable condition in nfs_free_seqid

 Storing a pointer to the struct rpc_task in the nfs_seqid is broken
 since the nfs_seqid may be freed well after the task has been destroyed.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h   | 1 -
 fs/nfs/nfs4state.c | 9 +--------
 2 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 8a3788199052..45bff1d1a513 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -112,7 +112,6 @@ struct nfs_seqid_counter {
 struct nfs_seqid {
 	struct list_head list;
 	struct nfs_seqid_counter *sequence;
-	struct rpc_task *task;
 };
 
 static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 23834c8fb740..da0861db57fb 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -676,7 +676,6 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (new != NULL) {
 		new->sequence = counter;
-		new->task = NULL;
 		spin_lock(&sequence->lock);
 		list_add_tail(&new->list, &sequence->list);
 		spin_unlock(&sequence->lock);
@@ -687,15 +686,10 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
 void nfs_free_seqid(struct nfs_seqid *seqid)
 {
 	struct rpc_sequence *sequence = seqid->sequence->sequence;
-	struct rpc_task *next = NULL;
 
 	spin_lock(&sequence->lock);
 	list_del(&seqid->list);
-	if (!list_empty(&sequence->list)) {
-		next = list_entry(sequence->list.next, struct nfs_seqid, list)->task;
-		if (next)
-			rpc_wake_up_task(next);
-	}
+	rpc_wake_up(&sequence->wait);
 	spin_unlock(&sequence->lock);
 	kfree(seqid);
 }
@@ -754,7 +748,6 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
 
 	spin_lock(&sequence->lock);
 	if (sequence->list.next != &seqid->list) {
-		seqid->task = task;
 		rpc_sleep_on(&sequence->wait, task, NULL, NULL);
 		status = -EAGAIN;
 	}
-- 
cgit v1.2.3-58-ga151


From 8c233cf9c2ad6f49df753bdce84fddbf00bf6a75 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Thu, 13 Oct 2005 16:54:27 -0400
Subject: NFSv4: handle no acl attr

 Stop handing garbage to userspace in the case where a weird server clears the
 acl bit in the getattr return (despite the fact that they've already claimed
 acl support.)

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c5c75235c5b8..cd762648fa9a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3255,7 +3255,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		if (attrlen <= *acl_len)
 			xdr_read_pages(xdr, attrlen);
 		*acl_len = attrlen;
-	}
+	} else
+		status = -EOPNOTSUPP;
 
 out:
 	return status;
-- 
cgit v1.2.3-58-ga151


From 1d95db8e1688ed54e143a597c5570631a42fa594 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Thu, 13 Oct 2005 16:54:32 -0400
Subject: NFSv4: Fix acl buffer size

 resp_len is passed in as buffer size to decode routine; make sure it's
 set right in case where userspace provides less than a page's worth of
 buffer.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ff378126ccd7..2d9357cf3fc5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2357,7 +2357,7 @@ static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size
 			return -ENOMEM;
 		args.acl_pages[0] = localpage;
 		args.acl_pgbase = 0;
-		args.acl_len = PAGE_SIZE;
+		resp_len = args.acl_len = PAGE_SIZE;
 	} else {
 		resp_buf = buf;
 		buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
-- 
cgit v1.2.3-58-ga151


From 4e51336a00bdcb42960acca52c23e90e9f4e6959 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 20 Oct 2005 14:22:41 -0700
Subject: NFSv4: Final tweak to sequence id

 Sacrifice queueing fairness for performance.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h   |  2 +-
 fs/nfs/nfs4state.c | 23 +++++++++++++----------
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 45bff1d1a513..5c0dd26d0985 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -110,8 +110,8 @@ struct nfs_seqid_counter {
 };
 
 struct nfs_seqid {
-	struct list_head list;
 	struct nfs_seqid_counter *sequence;
+	struct list_head list;
 };
 
 static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status)
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index da0861db57fb..c59ef90e956b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -670,15 +670,12 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
 
 struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
 {
-	struct rpc_sequence *sequence = counter->sequence;
 	struct nfs_seqid *new;
 
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (new != NULL) {
 		new->sequence = counter;
-		spin_lock(&sequence->lock);
-		list_add_tail(&new->list, &sequence->list);
-		spin_unlock(&sequence->lock);
+		INIT_LIST_HEAD(&new->list);
 	}
 	return new;
 }
@@ -687,10 +684,12 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
 {
 	struct rpc_sequence *sequence = seqid->sequence->sequence;
 
-	spin_lock(&sequence->lock);
-	list_del(&seqid->list);
-	rpc_wake_up(&sequence->wait);
-	spin_unlock(&sequence->lock);
+	if (!list_empty(&seqid->list)) {
+		spin_lock(&sequence->lock);
+		list_del(&seqid->list);
+		spin_unlock(&sequence->lock);
+	}
+	rpc_wake_up_next(&sequence->wait);
 	kfree(seqid);
 }
 
@@ -746,12 +745,16 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
 	struct rpc_sequence *sequence = seqid->sequence->sequence;
 	int status = 0;
 
+	if (sequence->list.next == &seqid->list)
+		goto out;
 	spin_lock(&sequence->lock);
-	if (sequence->list.next != &seqid->list) {
+	if (!list_empty(&sequence->list)) {
 		rpc_sleep_on(&sequence->wait, task, NULL, NULL);
 		status = -EAGAIN;
-	}
+	} else
+		list_add(&seqid->list, &sequence->list);
 	spin_unlock(&sequence->lock);
+out:
 	return status;
 }
 
-- 
cgit v1.2.3-58-ga151


From ec073428281b401f1142cb84b277a5b00c7994c9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 20 Oct 2005 14:22:47 -0700
Subject: NFSv4: Fix up locking for nfs4_state_owner  Signed-off-by: Trond
 Myklebust <Trond.Myklebust@netapp.com>

---
 fs/nfs/nfs4_fs.h   |  1 +
 fs/nfs/nfs4proc.c  |  2 ++
 fs/nfs/nfs4state.c | 20 +++++++++++++++-----
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5c0dd26d0985..78a53f5a9f18 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -126,6 +126,7 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
  * semantics by allowing the server to identify replayed requests.
  */
 struct nfs4_state_owner {
+	spinlock_t	     so_lock;
 	struct list_head     so_list;	 /* per-clientid list of state_owners */
 	struct nfs4_client   *so_client;
 	u32                  so_id;      /* 32-bit identifier, unique */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2d9357cf3fc5..9c1da34036aa 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -212,6 +212,7 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
 
 	open_flags &= (FMODE_READ|FMODE_WRITE);
 	/* Protect against nfs4_find_state() */
+	spin_lock(&state->owner->so_lock);
 	spin_lock(&inode->i_lock);
 	state->state |= open_flags;
 	/* NB! List reordering - see the reclaim code for why.  */
@@ -221,6 +222,7 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
 		state->nreaders++;
 	memcpy(&state->stateid, stateid, sizeof(state->stateid));
 	spin_unlock(&inode->i_lock);
+	spin_unlock(&state->owner->so_lock);
 }
 
 /*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c59ef90e956b..2d5a6a2b9dec 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -267,6 +267,7 @@ nfs4_alloc_state_owner(void)
 	sp = kzalloc(sizeof(*sp),GFP_KERNEL);
 	if (!sp)
 		return NULL;
+	spin_lock_init(&sp->so_lock);
 	INIT_LIST_HEAD(&sp->so_states);
 	INIT_LIST_HEAD(&sp->so_delegations);
 	rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue");
@@ -438,20 +439,23 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
 	if (state)
 		goto out;
 	new = nfs4_alloc_open_state();
+	spin_lock(&owner->so_lock);
 	spin_lock(&inode->i_lock);
 	state = __nfs4_find_state_byowner(inode, owner);
 	if (state == NULL && new != NULL) {
 		state = new;
-		/* Note: The reclaim code dictates that we add stateless
-		 * and read-only stateids to the end of the list */
-		list_add_tail(&state->open_states, &owner->so_states);
 		state->owner = owner;
 		atomic_inc(&owner->so_count);
 		list_add(&state->inode_states, &nfsi->open_states);
 		state->inode = igrab(inode);
 		spin_unlock(&inode->i_lock);
+		/* Note: The reclaim code dictates that we add stateless
+		 * and read-only stateids to the end of the list */
+		list_add_tail(&state->open_states, &owner->so_states);
+		spin_unlock(&owner->so_lock);
 	} else {
 		spin_unlock(&inode->i_lock);
+		spin_unlock(&owner->so_lock);
 		if (new)
 			nfs4_free_open_state(new);
 	}
@@ -468,12 +472,14 @@ void nfs4_put_open_state(struct nfs4_state *state)
 	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
 
-	if (!atomic_dec_and_lock(&state->count, &inode->i_lock))
+	if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
 		return;
+	spin_lock(&inode->i_lock);
 	if (!list_empty(&state->inode_states))
 		list_del(&state->inode_states);
-	spin_unlock(&inode->i_lock);
 	list_del(&state->open_states);
+	spin_unlock(&inode->i_lock);
+	spin_unlock(&owner->so_lock);
 	iput(inode);
 	BUG_ON (state->state != 0);
 	nfs4_free_open_state(state);
@@ -491,6 +497,7 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 
 	atomic_inc(&owner->so_count);
 	/* Protect against nfs4_find_state() */
+	spin_lock(&owner->so_lock);
 	spin_lock(&inode->i_lock);
 	if (mode & FMODE_READ)
 		state->nreaders--;
@@ -503,6 +510,7 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 		list_move_tail(&state->open_states, &owner->so_states);
 	}
 	spin_unlock(&inode->i_lock);
+	spin_unlock(&owner->so_lock);
 	newstate = 0;
 	if (state->state != 0) {
 		if (state->nreaders)
@@ -899,6 +907,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
 	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
 		sp->so_seqid.counter = 0;
 		sp->so_seqid.flags = 0;
+		spin_lock(&sp->so_lock);
 		list_for_each_entry(state, &sp->so_states, open_states) {
 			list_for_each_entry(lock, &state->lock_states, ls_locks) {
 				lock->ls_seqid.counter = 0;
@@ -906,6 +915,7 @@ static void nfs4_state_mark_reclaim(struct nfs4_client *clp)
 				lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
 			}
 		}
+		spin_unlock(&sp->so_lock);
 	}
 }
 
-- 
cgit v1.2.3-58-ga151


From 34123da66e613602de5a886b05c875b6a91b8ed2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 19:10:09 -0400
Subject: NFS: Fix a bad cast in nfs3_read_done

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index df80477c5af4..e4a1cd48195e 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -735,7 +735,7 @@ extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 static void
 nfs3_read_done(struct rpc_task *task)
 {
-	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
 
 	if (nfs3_async_handle_jukebox(task))
 		return;
-- 
cgit v1.2.3-58-ga151


From 0e574af1be5f569a5d7f2800333b0bfb358a5e34 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:38 -0400
Subject: NFS: Cleanup initialisation of struct nfs_fattr

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           |  1 +
 fs/nfs/nfs3proc.c      | 58 +++++++++++++++++++++++++-------------------------
 fs/nfs/nfs4proc.c      | 43 +++++++++++++++++--------------------
 fs/nfs/proc.c          | 26 +++++++++++-----------
 fs/nfs/read.c          |  1 +
 fs/nfs/write.c         |  2 ++
 include/linux/nfs_fs.h |  5 +++++
 7 files changed, 71 insertions(+), 65 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index eb50c19fc253..b8a73045e9a0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -532,6 +532,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	my_entry.eof = 0;
 	my_entry.fh = &fh;
 	my_entry.fattr = &fattr;
+	nfs_fattr_init(&fattr);
 	desc->entry = &my_entry;
 
 	while(!desc->entry->eof) {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e4a1cd48195e..4b1b48b139f6 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -78,7 +78,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
-	info->fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
 	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
@@ -98,7 +98,7 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  getattr\n");
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(server->client, NFS3PROC_GETATTR,
 			  fhandle, fattr, 0);
 	dprintk("NFS reply getattr: %d\n", status);
@@ -117,7 +117,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	int	status;
 
 	dprintk("NFS call  setattr\n");
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
@@ -143,8 +143,8 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
-	dir_attr.valid = 0;
-	fattr->valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
 	if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
 		status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
@@ -174,7 +174,6 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 	int status;
 
 	dprintk("NFS call  access\n");
-	fattr.valid = 0;
 
 	if (mode & MAY_READ)
 		arg.access |= NFS3_ACCESS_READ;
@@ -189,6 +188,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 		if (mode & MAY_EXEC)
 			arg.access |= NFS3_ACCESS_EXECUTE;
 	}
+	nfs_fattr_init(&fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	nfs_refresh_inode(inode, &fattr);
 	if (status == 0) {
@@ -217,7 +217,7 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
 	int			status;
 
 	dprintk("NFS call  readlink\n");
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
 			  &args, &fattr, 0);
 	nfs_refresh_inode(inode, &fattr);
@@ -240,7 +240,7 @@ static int nfs3_proc_read(struct nfs_read_data *rdata)
 
 	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
 			(long long) rdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
 	if (status >= 0)
 		nfs_refresh_inode(inode, fattr);
@@ -263,7 +263,7 @@ static int nfs3_proc_write(struct nfs_write_data *wdata)
 
 	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
 			(long long) wdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
 	if (status >= 0)
 		nfs_refresh_inode(inode, fattr);
@@ -285,7 +285,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
 
 	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
 			(long long) cdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status >= 0)
 		nfs_refresh_inode(inode, fattr);
@@ -329,8 +329,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	sattr->ia_mode &= ~current->fs->umask;
 
 again:
-	dir_attr.valid = 0;
-	fattr.valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 
@@ -401,7 +401,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
 	int			status;
 
 	dprintk("NFS call  remove %s\n", name->name);
-	dir_attr.valid = 0;
+	nfs_fattr_init(&dir_attr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	dprintk("NFS reply remove: %d\n", status);
@@ -422,7 +422,7 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr
 	ptr->arg.fh = NFS_FH(dir->d_inode);
 	ptr->arg.name = name->name;
 	ptr->arg.len = name->len;
-	ptr->res.valid = 0;
+	nfs_fattr_init(&ptr->res);
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE];
 	msg->rpc_argp = &ptr->arg;
 	msg->rpc_resp = &ptr->res;
@@ -465,8 +465,8 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 	int			status;
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-	old_dir_attr.valid = 0;
-	new_dir_attr.valid = 0;
+	nfs_fattr_init(&old_dir_attr);
+	nfs_fattr_init(&new_dir_attr);
 	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
 	nfs_refresh_inode(old_dir, &old_dir_attr);
 	nfs_refresh_inode(new_dir, &new_dir_attr);
@@ -491,8 +491,8 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 	int			status;
 
 	dprintk("NFS call  link %s\n", name->name);
-	dir_attr.valid = 0;
-	fattr.valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	nfs_refresh_inode(inode, &fattr);
@@ -524,8 +524,8 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	if (path->len > NFS3_MAXPATHLEN)
 		return -ENAMETOOLONG;
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
-	dir_attr.valid = 0;
-	fattr->valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	dprintk("NFS reply symlink: %d\n", status);
@@ -552,11 +552,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	int status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
-	dir_attr.valid = 0;
-	fattr.valid = 0;
 
 	sattr->ia_mode &= ~current->fs->umask;
 
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	if (status != 0)
@@ -582,7 +582,7 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
 	int			status;
 
 	dprintk("NFS call  rmdir %s\n", name->name);
-	dir_attr.valid = 0;
+	nfs_fattr_init(&dir_attr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	dprintk("NFS reply rmdir: %d\n", status);
@@ -634,7 +634,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	dprintk("NFS call  readdir%s %d\n",
 			plus? "plus" : "", (unsigned int) cookie);
 
-	dir_attr.valid = 0;
+	nfs_fattr_init(&dir_attr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	dprintk("NFS reply readdir: %d\n", status);
@@ -676,8 +676,8 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	sattr->ia_mode &= ~current->fs->umask;
 
-	dir_attr.valid = 0;
-	fattr.valid = 0;
+	nfs_fattr_init(&dir_attr);
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
 	nfs_refresh_inode(dir, &dir_attr);
 	if (status != 0)
@@ -698,7 +698,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  fsstat\n");
-	stat->fattr->valid = 0;
+	nfs_fattr_init(stat->fattr);
 	status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	return status;
@@ -711,7 +711,7 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
-	info->fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	return status;
@@ -724,7 +724,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  pathconf\n");
-	info->fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
 	dprintk("NFS reply pathconf: %d\n", status);
 	return status;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9c1da34036aa..2a759e8e387c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -494,9 +494,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
-	struct nfs_fattr        f_attr = {
-		.valid = 0,
-	};
+	struct nfs_fattr        f_attr;
 	struct nfs_openargs o_arg = {
 		.fh = NFS_FH(dir),
 		.open_flags = state->state,
@@ -522,6 +520,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 	status = -ENOMEM;
 	if (o_arg.seqid == NULL)
 		goto out;
+	nfs_fattr_init(&f_attr);
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_nodeleg;
@@ -692,9 +691,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_client *clp = server->nfs4_state;
 	struct inode *inode = NULL;
 	int                     status;
-	struct nfs_fattr        f_attr = {
-		.valid          = 0,
-	};
+	struct nfs_fattr        f_attr;
 	struct nfs_openargs o_arg = {
 		.fh             = NFS_FH(dir),
 		.open_flags	= flags,
@@ -726,6 +723,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
 	if (o_arg.seqid == NULL)
 		return -ENOMEM;
+	nfs_fattr_init(&f_attr);
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_err;
@@ -824,7 +822,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
         };
 	int status;
 
-        fattr->valid = 0;
+	nfs_fattr_init(fattr);
 
 	if (state != NULL) {
 		msg.rpc_cred = state->owner->so_cred;
@@ -1107,13 +1105,12 @@ static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fh
 static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		struct nfs_fsinfo *info)
 {
-	struct nfs_fattr *	fattr = info->fattr;
 	struct nfs4_lookup_root_arg args = {
 		.bitmask = nfs4_fattr_bitmap,
 	};
 	struct nfs4_lookup_res res = {
 		.server = server,
-		.fattr = fattr,
+		.fattr = info->fattr,
 		.fh = fhandle,
 	};
 	struct rpc_message msg = {
@@ -1121,7 +1118,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
@@ -1184,7 +1181,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		q.len = p - q.name;
 
 		do {
-			fattr->valid = 0;
+			nfs_fattr_init(fattr);
 			status = nfs4_handle_exception(server,
 					rpc_call_sync(server->client, &msg, 0),
 					&exception);
@@ -1221,7 +1218,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_resp = &res,
 	};
 	
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
@@ -1263,7 +1260,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	struct nfs4_state *state;
 	int status;
 
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	
 	cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
 	if (IS_ERR(cred))
@@ -1309,7 +1306,7 @@ static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		.rpc_resp = &res,
 	};
 	
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	
 	dprintk("NFS call  lookup %s\n", name->name);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
@@ -1458,7 +1455,7 @@ static int _nfs4_proc_read(struct nfs_read_data *rdata)
 	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
 			(long long) rdata->args.offset);
 
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(server->client, &msg, flags);
 	if (!status)
 		renew_lease(server, timestamp);
@@ -1495,7 +1492,7 @@ static int _nfs4_proc_write(struct nfs_write_data *wdata)
 	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
 			(long long) wdata->args.offset);
 
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(server->client, &msg, rpcflags);
 	dprintk("NFS reply write: %d\n", status);
 	return status;
@@ -1529,7 +1526,7 @@ static int _nfs4_proc_commit(struct nfs_write_data *cdata)
 	dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
 			(long long) cdata->args.offset);
 
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply commit: %d\n", status);
 	return status;
@@ -1769,7 +1766,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 	if (path->len > NFS4_MAXPATHLEN)
 		return -ENAMETOOLONG;
 	arg.u.symlink = path;
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (!status)
@@ -1818,7 +1815,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 	};
 	int			status;
 
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (!status) {
@@ -1916,7 +1913,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 	int			status;
 	int                     mode = sattr->ia_mode;
 
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 
 	BUG_ON(!(sattr->ia_valid & ATTR_MODE));
 	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
@@ -1969,7 +1966,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 		.rpc_resp = fsstat,
 	};
 
-	fsstat->fattr->valid = 0;
+	nfs_fattr_init(fsstat->fattr);
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
@@ -2016,7 +2013,7 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
 
 static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
 {
-	fsinfo->fattr->valid = 0;
+	nfs_fattr_init(fsinfo->fattr);
 	return nfs4_do_fsinfo(server, fhandle, fsinfo);
 }
 
@@ -2039,7 +2036,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
 		return 0;
 	}
 
-	pathconf->fattr->valid = 0;
+	nfs_fattr_init(pathconf->fattr);
 	return rpc_call_sync(server->client, &msg, 0);
 }
 
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 8fef86523d7f..5ef28f08f424 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -61,7 +61,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	int status;
 
 	dprintk("%s: call getattr\n", __FUNCTION__);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
 	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
@@ -93,7 +93,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  getattr\n");
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(server->client, NFSPROC_GETATTR,
 				fhandle, fattr, 0);
 	dprintk("NFS reply getattr: %d\n", status);
@@ -112,7 +112,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	int	status;
 
 	dprintk("NFS call  setattr\n");
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
 	if (status == 0)
 		nfs_setattr_update_inode(inode, sattr);
@@ -136,7 +136,7 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
 	int			status;
 
 	dprintk("NFS call  lookup %s\n", name->name);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
 	dprintk("NFS reply lookup: %d\n", status);
 	return status;
@@ -174,7 +174,7 @@ static int nfs_proc_read(struct nfs_read_data *rdata)
 
 	dprintk("NFS call  read %d @ %Ld\n", rdata->args.count,
 			(long long) rdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
 	if (status >= 0) {
 		nfs_refresh_inode(inode, fattr);
@@ -203,7 +203,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
 
 	dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
 			(long long) wdata->args.offset);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
 	if (status >= 0) {
 		nfs_refresh_inode(inode, fattr);
@@ -232,7 +232,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	};
 	int			status;
 
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
 	if (status == 0)
@@ -273,12 +273,12 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */
 	}
 
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
 
 	if (status == -EINVAL && S_ISFIFO(mode)) {
 		sattr->ia_mode = mode;
-		fattr.valid = 0;
+		nfs_fattr_init(&fattr);
 		status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
 	}
 	if (status == 0)
@@ -391,7 +391,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	if (path->len > NFS2_MAXPATHLEN)
 		return -ENAMETOOLONG;
 	dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
-	fattr->valid = 0;
+	nfs_fattr_init(fattr);
 	fhandle->size = 0;
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
 	dprintk("NFS reply symlink: %d\n", status);
@@ -416,7 +416,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	int			status;
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
-	fattr.valid = 0;
+	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -484,7 +484,7 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  statfs\n");
-	stat->fattr->valid = 0;
+	nfs_fattr_init(stat->fattr);
 	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
 	dprintk("NFS reply statfs: %d\n", status);
 	if (status)
@@ -507,7 +507,7 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
 	int	status;
 
 	dprintk("NFS call  fsinfo\n");
-	info->fattr->valid = 0;
+	nfs_fattr_init(info->fattr);
 	status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
 	dprintk("NFS reply fsinfo: %d\n", status);
 	if (status)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9758ebd49905..43b03b19731b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -215,6 +215,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
 	data->res.eof     = 0;
+	nfs_fattr_init(&data->fattr);
 
 	NFS_PROTO(inode)->read_setup(data);
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5130eda231d7..819a65f5071f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -870,6 +870,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
 	data->res.verf    = &data->verf;
+	nfs_fattr_init(&data->fattr);
 
 	NFS_PROTO(inode)->write_setup(data, how);
 
@@ -1237,6 +1238,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	data->res.count   = 0;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
+	nfs_fattr_init(&data->fattr);
 	
 	NFS_PROTO(inode)->commit_setup(data, how);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7bac2785c6e4..8120fd68dee5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -316,6 +316,11 @@ extern void nfs_file_clear_open_context(struct file *filp);
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern u32 root_nfs_parse_addr(char *name); /*__init*/
 
+static inline void nfs_fattr_init(struct nfs_fattr *fattr)
+{
+	fattr->valid = 0;
+}
+
 /*
  * linux/fs/nfs/file.c
  */
-- 
cgit v1.2.3-58-ga151


From 913a70fc170530f7e1ff0693595155457cc6d0ca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:38 -0400
Subject: NFS: Convert cache_change_attribute into a jiffy-based value

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 8 ++++----
 include/linux/nfs_fs.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 65d5ab45ddc5..449df8c8aa31 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1135,7 +1135,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	 * We may need to keep the attributes marked as invalid if
 	 * we raced with nfs_end_attr_update().
 	 */
-	if (verifier == nfsi->cache_change_attribute)
+	if (time_after_eq(verifier, nfsi->cache_change_attribute))
 		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
 	spin_unlock(&inode->i_lock);
 
@@ -1202,7 +1202,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 		if (S_ISDIR(inode->i_mode)) {
 			memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
 			/* This ensures we revalidate child dentries */
-			nfsi->cache_change_attribute++;
+			nfsi->cache_change_attribute = jiffies;
 		}
 		spin_unlock(&inode->i_lock);
 
@@ -1242,7 +1242,7 @@ void nfs_end_data_update(struct inode *inode)
 			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
 		spin_unlock(&inode->i_lock);
 	}
-	nfsi->cache_change_attribute ++;
+	nfsi->cache_change_attribute = jiffies;
 	atomic_dec(&nfsi->data_updates);
 }
 
@@ -1391,7 +1391,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 		/* Do we perhaps have any outstanding writes? */
 		if (nfsi->npages == 0) {
 			/* No, but did we race with nfs_end_data_update()? */
-			if (verifier  ==  nfsi->cache_change_attribute) {
+			if (time_after_eq(verifier,  nfsi->cache_change_attribute)) {
 				inode->i_size = new_isize;
 				invalid |= NFS_INO_INVALID_DATA;
 			}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8120fd68dee5..abf890f5fbfb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -280,7 +280,7 @@ static inline long nfs_save_change_attribute(struct inode *inode)
 static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr)
 {
 	return !nfs_caches_unstable(inode)
-		&& chattr == NFS_I(inode)->cache_change_attribute;
+		&& time_after_eq(chattr, NFS_I(inode)->cache_change_attribute);
 }
 
 /*
-- 
cgit v1.2.3-58-ga151


From 33801147a8fda6b04d7e9afe1d42f1c01d3d6837 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:39 -0400
Subject: NFS: Optimise inode attribute cache updates

 Allow nfs_refresh_inode() also to update attributes on the inode if the
 RPC call was sent after the last call to nfs_update_inode().

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c          | 54 ++++++++++++++++++++++++++++++++++++-------------
 fs/nfs/nfs2xdr.c        |  1 -
 fs/nfs/nfs3xdr.c        |  1 -
 fs/nfs/nfs4xdr.c        |  4 +---
 include/linux/nfs_fs.h  |  2 ++
 include/linux/nfs_xdr.h |  2 +-
 6 files changed, 44 insertions(+), 20 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 449df8c8aa31..b7d4f8f13ac2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -785,7 +785,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		else
 			init_special_inode(inode, inode->i_mode, fattr->rdev);
 
-		nfsi->read_cache_jiffies = fattr->timestamp;
+		nfsi->read_cache_jiffies = fattr->time_start;
+		nfsi->last_updated = jiffies;
 		inode->i_atime = fattr->atime;
 		inode->i_mtime = fattr->mtime;
 		inode->i_ctime = fattr->ctime;
@@ -1120,14 +1121,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		goto out;
 	}
 
+	spin_lock(&inode->i_lock);
 	status = nfs_update_inode(inode, &fattr, verifier);
 	if (status) {
+		spin_unlock(&inode->i_lock);
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
 			 inode->i_sb->s_id,
 			 (long long)NFS_FILEID(inode), status);
 		goto out;
 	}
-	spin_lock(&inode->i_lock);
 	cache_validity = nfsi->cache_validity;
 	nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
 
@@ -1247,7 +1249,7 @@ void nfs_end_data_update(struct inode *inode)
 }
 
 /**
- * nfs_refresh_inode - verify consistency of the inode attribute cache
+ * nfs_check_inode_attributes - verify consistency of the inode attribute cache
  * @inode - pointer to inode
  * @fattr - updated attributes
  *
@@ -1255,13 +1257,12 @@ void nfs_end_data_update(struct inode *inode)
  * so that fattr carries weak cache consistency data, then it may
  * also update the ctime/mtime/change_attribute.
  */
-int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fattr)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t cur_size, new_isize;
 	int data_unstable;
 
-	spin_lock(&inode->i_lock);
 
 	/* Are we in the process of updating data on the server? */
 	data_unstable = nfs_caches_unstable(inode);
@@ -1325,11 +1326,40 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	if (!timespec_equal(&inode->i_atime, &fattr->atime))
 		nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
 
-	nfsi->read_cache_jiffies = fattr->timestamp;
-	spin_unlock(&inode->i_lock);
+	nfsi->read_cache_jiffies = fattr->time_start;
 	return 0;
 }
 
+/**
+ * nfs_refresh_inode - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * Check that an RPC call that returned attributes has not overlapped with
+ * other recent updates of the inode metadata, then decide whether it is
+ * safe to do a full update of the inode attributes, or whether just to
+ * call nfs_check_inode_attributes.
+ */
+int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int status;
+
+	if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+		return 0;
+	spin_lock(&inode->i_lock);
+	nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+	if (nfs_verify_change_attribute(inode, fattr->time_start))
+		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
+	if (time_after(fattr->time_start, nfsi->last_updated))
+		status = nfs_update_inode(inode, fattr, fattr->time_start);
+	else
+		status = nfs_check_inode_attributes(inode, fattr);
+
+	spin_unlock(&inode->i_lock);
+	return status;
+}
+
 /*
  * Many nfs protocol calls return the new file attributes after
  * an operation.  Here we update the inode to reflect the state
@@ -1365,20 +1395,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 		goto out_err;
 	}
 
-	spin_lock(&inode->i_lock);
-
 	/*
 	 * Make sure the inode's type hasn't changed.
 	 */
-	if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
-		spin_unlock(&inode->i_lock);
+	if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
 		goto out_changed;
-	}
 
 	/*
 	 * Update the read time so we don't revalidate too often.
 	 */
-	nfsi->read_cache_jiffies = fattr->timestamp;
+	nfsi->read_cache_jiffies = fattr->time_start;
+	nfsi->last_updated = jiffies;
 
 	/* Are we racing with known updates of the metadata on the server? */
 	data_unstable = ! (nfs_verify_change_attribute(inode, verifier) ||
@@ -1467,7 +1494,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 	if (!nfs_have_delegation(inode, FMODE_READ))
 		nfsi->cache_validity |= invalid;
 
-	spin_unlock(&inode->i_lock);
 	return 0;
  out_changed:
 	/*
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index d91b69044a4d..59049e864ca7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -143,7 +143,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
 		fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
 		fattr->rdev = 0;
 	}
-	fattr->timestamp = jiffies;
 	return p;
 }
 
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index db4a904810a4..0498bd36602c 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -174,7 +174,6 @@ xdr_decode_fattr(u32 *p, struct nfs_fattr *fattr)
 
 	/* Update the mode bits */
 	fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
-	fattr->timestamp = jiffies;
 	return p;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cd762648fa9a..8b21de8a06fa 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2799,10 +2799,8 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
 		goto xdr_error;
 	if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
 		goto xdr_error;
-	if ((status = verify_attr_len(xdr, savep, attrlen)) == 0) {
+	if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
 		fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
-		fattr->timestamp = jiffies;
-	}
 xdr_error:
 	if (status != 0)
 		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index abf890f5fbfb..faeaad666ca8 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -141,6 +141,7 @@ struct nfs_inode {
 	unsigned long		attrtimeo_timestamp;
 	__u64			change_attr;		/* v4 only */
 
+	unsigned long		last_updated;
 	/* "Generation counter" for the attribute cache. This is
 	 * bumped whenever we update the metadata on the
 	 * server.
@@ -319,6 +320,7 @@ extern u32 root_nfs_parse_addr(char *name); /*__init*/
 static inline void nfs_fattr_init(struct nfs_fattr *fattr)
 {
 	fattr->valid = 0;
+	fattr->time_start = jiffies;
 }
 
 /*
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 60086dac11d5..aeaee7e7c51d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -41,7 +41,7 @@ struct nfs_fattr {
 	__u32			bitmap[2];	/* NFSv4 returned attribute bitmap */
 	__u64			change_attr;	/* NFSv4 change attribute */
 	__u64			pre_change_attr;/* pre-op NFSv4 change attribute */
-	unsigned long		timestamp;
+	unsigned long		time_start;
 };
 
 #define NFS_ATTR_WCC		0x0001		/* pre-op WCC data    */
-- 
cgit v1.2.3-58-ga151


From decf491f3076190262d4c649bed877650623903a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:39 -0400
Subject: NFS: Don't let nfs_end_data_update() clobber attribute update
 information

 Since we almost always call nfs_end_data_update() after we called
 nfs_refresh_inode(), we now end up marking the inode metadata
 as needing revalidation immediately after having updated it.

 This patch rearranges things so that we mark the inode as needing
 revalidation _before_ we call nfs_refresh_inode() on those operations
 that need it.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 38 ++++++++++++++++++++++++++++++++------
 fs/nfs/nfs3proc.c      | 30 +++++++++++++++---------------
 fs/nfs/nfs4proc.c      |  3 +++
 fs/nfs/proc.c          | 16 +++++++++++++---
 include/linux/nfs_fs.h | 15 ++++++++++-----
 5 files changed, 73 insertions(+), 29 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b7d4f8f13ac2..6b3156e15350 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1236,13 +1236,12 @@ void nfs_end_data_update(struct inode *inode)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (!nfs_have_delegation(inode, FMODE_READ)) {
-		/* Mark the attribute cache for revalidation */
-		spin_lock(&inode->i_lock);
-		nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
-		/* Directories and symlinks: invalidate page cache too */
-		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+		/* Directories and symlinks: invalidate page cache */
+		if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) {
+			spin_lock(&inode->i_lock);
 			nfsi->cache_validity |= NFS_INO_INVALID_DATA;
-		spin_unlock(&inode->i_lock);
+			spin_unlock(&inode->i_lock);
+		}
 	}
 	nfsi->cache_change_attribute = jiffies;
 	atomic_dec(&nfsi->data_updates);
@@ -1360,6 +1359,33 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	return status;
 }
 
+/**
+ * nfs_post_op_update_inode - try to update the inode attribute cache
+ * @inode - pointer to inode
+ * @fattr - updated attributes
+ *
+ * After an operation that has changed the inode metadata, mark the
+ * attribute cache as being invalid, then try to update it.
+ */
+int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
+{
+	struct nfs_inode *nfsi = NFS_I(inode);
+	int status = 0;
+
+	spin_lock(&inode->i_lock);
+	if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) {
+		nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+		goto out;
+	}
+	status = nfs_update_inode(inode, fattr, fattr->time_start);
+	if (time_after_eq(fattr->time_start, nfsi->cache_change_attribute))
+		nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
+	nfsi->cache_change_attribute = jiffies;
+out:
+	spin_unlock(&inode->i_lock);
+	return status;
+}
+
 /*
  * Many nfs protocol calls return the new file attributes after
  * an operation.  Here we update the inode to reflect the state
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 4b1b48b139f6..92c870d19ccd 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -266,7 +266,7 @@ static int nfs3_proc_write(struct nfs_write_data *wdata)
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
 	if (status >= 0)
-		nfs_refresh_inode(inode, fattr);
+		nfs_post_op_update_inode(inode, fattr);
 	dprintk("NFS reply write: %d\n", status);
 	return status < 0? status : wdata->res.count;
 }
@@ -288,7 +288,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
 	if (status >= 0)
-		nfs_refresh_inode(inode, fattr);
+		nfs_post_op_update_inode(inode, fattr);
 	dprintk("NFS reply commit: %d\n", status);
 	return status;
 }
@@ -332,7 +332,7 @@ again:
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 
 	/* If the server doesn't support the exclusive creation semantics,
 	 * try again with simple 'guarded' mode. */
@@ -403,7 +403,7 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
 	dprintk("NFS call  remove %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply remove: %d\n", status);
 	return status;
 }
@@ -439,7 +439,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 		return 1;
 	if (msg->rpc_argp) {
 		dir_attr = (struct nfs_fattr*)msg->rpc_resp;
-		nfs_refresh_inode(dir->d_inode, dir_attr);
+		nfs_post_op_update_inode(dir->d_inode, dir_attr);
 		kfree(msg->rpc_argp);
 	}
 	return 0;
@@ -468,8 +468,8 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 	nfs_fattr_init(&old_dir_attr);
 	nfs_fattr_init(&new_dir_attr);
 	status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
-	nfs_refresh_inode(old_dir, &old_dir_attr);
-	nfs_refresh_inode(new_dir, &new_dir_attr);
+	nfs_post_op_update_inode(old_dir, &old_dir_attr);
+	nfs_post_op_update_inode(new_dir, &new_dir_attr);
 	dprintk("NFS reply rename: %d\n", status);
 	return status;
 }
@@ -494,8 +494,8 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
-	nfs_refresh_inode(inode, &fattr);
+	nfs_post_op_update_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(inode, &fattr);
 	dprintk("NFS reply link: %d\n", status);
 	return status;
 }
@@ -527,7 +527,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
@@ -558,7 +558,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
 	status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -584,7 +584,7 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
 	dprintk("NFS call  rmdir %s\n", name->name);
 	nfs_fattr_init(&dir_attr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
 }
@@ -679,7 +679,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	nfs_fattr_init(&dir_attr);
 	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
-	nfs_refresh_inode(dir, &dir_attr);
+	nfs_post_op_update_inode(dir, &dir_attr);
 	if (status != 0)
 		goto out;
 	status = nfs_instantiate(dentry, &fh, &fattr);
@@ -775,7 +775,7 @@ nfs3_write_done(struct rpc_task *task)
 		return;
 	data = (struct nfs_write_data *)task->tk_calldata;
 	if (task->tk_status >= 0)
-		nfs_refresh_inode(data->inode, data->res.fattr);
+		nfs_post_op_update_inode(data->inode, data->res.fattr);
 	nfs_writeback_done(task);
 }
 
@@ -819,7 +819,7 @@ nfs3_commit_done(struct rpc_task *task)
 		return;
 	data = (struct nfs_write_data *)task->tk_calldata;
 	if (task->tk_status >= 0)
-		nfs_refresh_inode(data->inode, data->res.fattr);
+		nfs_post_op_update_inode(data->inode, data->res.fattr);
 	nfs_commit_done(task);
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2a759e8e387c..3274f2d354f3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -187,8 +187,11 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
+	spin_lock(&inode->i_lock);
+	nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 	if (cinfo->before == nfsi->change_attr && cinfo->atomic)
 		nfsi->change_attr = cinfo->after;
+	spin_unlock(&inode->i_lock);
 }
 
 /* Helper for asynchronous RPC calls */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 5ef28f08f424..a48a003242c0 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -206,7 +206,7 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
 	if (status >= 0) {
-		nfs_refresh_inode(inode, fattr);
+		nfs_post_op_update_inode(inode, fattr);
 		wdata->res.count = wdata->args.count;
 		wdata->verf.committed = NFS_FILE_SYNC;
 	}
@@ -275,6 +275,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
 	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+	nfs_mark_for_revalidate(dir);
 
 	if (status == -EINVAL && S_ISFIFO(mode)) {
 		sattr->ia_mode = mode;
@@ -305,6 +306,7 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
 
 	dprintk("NFS call  remove %s\n", name->name);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nfs_mark_for_revalidate(dir);
 
 	dprintk("NFS reply remove: %d\n", status);
 	return status;
@@ -331,8 +333,10 @@ nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 {
 	struct rpc_message *msg = &task->tk_msg;
 	
-	if (msg->rpc_argp)
+	if (msg->rpc_argp) {
+		nfs_mark_for_revalidate(dir->d_inode);
 		kfree(msg->rpc_argp);
+	}
 	return 0;
 }
 
@@ -352,6 +356,8 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
 
 	dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
 	status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+	nfs_mark_for_revalidate(old_dir);
+	nfs_mark_for_revalidate(new_dir);
 	dprintk("NFS reply rename: %d\n", status);
 	return status;
 }
@@ -369,6 +375,7 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 
 	dprintk("NFS call  link %s\n", name->name);
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply link: %d\n", status);
 	return status;
 }
@@ -394,6 +401,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
 	nfs_fattr_init(fattr);
 	fhandle->size = 0;
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply symlink: %d\n", status);
 	return status;
 }
@@ -418,6 +426,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	nfs_fattr_init(&fattr);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+	nfs_mark_for_revalidate(dir);
 	if (status == 0)
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
 	dprintk("NFS reply mkdir: %d\n", status);
@@ -436,6 +445,7 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
 
 	dprintk("NFS call  rmdir %s\n", name->name);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+	nfs_mark_for_revalidate(dir);
 	dprintk("NFS reply rmdir: %d\n", status);
 	return status;
 }
@@ -579,7 +589,7 @@ nfs_write_done(struct rpc_task *task)
 	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
 
 	if (task->tk_status >= 0)
-		nfs_refresh_inode(data->inode, data->res.fattr);
+		nfs_post_op_update_inode(data->inode, data->res.fattr);
 	nfs_writeback_done(task);
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index faeaad666ca8..325fe7ae49bb 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -241,13 +241,17 @@ static inline int nfs_caches_unstable(struct inode *inode)
 	return atomic_read(&NFS_I(inode)->data_updates) != 0;
 }
 
+static inline void nfs_mark_for_revalidate(struct inode *inode)
+{
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
+	spin_unlock(&inode->i_lock);
+}
+
 static inline void NFS_CACHEINV(struct inode *inode)
 {
-	if (!nfs_caches_unstable(inode)) {
-		spin_lock(&inode->i_lock);
-		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
-		spin_unlock(&inode->i_lock);
-	}
+	if (!nfs_caches_unstable(inode))
+		nfs_mark_for_revalidate(inode);
 }
 
 static inline int nfs_server_capable(struct inode *inode, int cap)
@@ -291,6 +295,7 @@ extern void nfs_zap_caches(struct inode *);
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 				struct nfs_fattr *);
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
+extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
 extern int nfs_permission(struct inode *, int, struct nameidata *);
 extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *);
-- 
cgit v1.2.3-58-ga151


From 0c70b50150cfb0b43ff500a8a394a52b4d5f1350 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Tue, 25 Oct 2005 11:48:36 -0400
Subject: NFS: nfs_lookup doesn't need to revalidate the parent directory's
 inode

 nfs_lookup() used to consult a lookup cache before trying an actual wire
 lookup operation.  The lookup cache would be invalid, of course, if the
 parent directory's mtime had changed, so nfs_lookup performed an inode
 revalidation on the parent.

 Since nfs_lookup() doesn't use a cache anymore, the revalidation is no
 longer necessary.  There are cases where it will generate a lot of
 unnecessary GETATTR traffic.

 See http://bugzilla.linux-nfs.org/show_bug.cgi?id=9

 Test-plan:
 Use lndir and "rm -rf" and watch for excess GETATTR traffic or application
 level errors.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index b8a73045e9a0..ce8f77dadff9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -853,12 +853,6 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
 
 	lock_kernel();
-	/* Revalidate parent directory attribute cache */
-	error = nfs_revalidate_inode(NFS_SERVER(dir), dir);
-	if (error < 0) {
-		res = ERR_PTR(error);
-		goto out_unlock;
-	}
 
 	/* If we're doing an exclusive create, optimize away the lookup */
 	if (nfs_is_exclusive_create(dir, nd))
-- 
cgit v1.2.3-58-ga151


From 56ae19f38f10aad4f27f7e12138a29b295dff07a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:40 -0400
Subject: NFSv4: Add directory post-op attributes to the CREATE operations.

 Since the directory attributes change every time we CREATE a file,
 we might as well pick up the new directory attributes in the same
 compound.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 28 ++++++++++++++++----
 fs/nfs/nfs4xdr.c        | 69 ++++++++++++++++++++++++++++++++++++++++++-------
 include/linux/nfs_xdr.h |  2 ++
 3 files changed, 84 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3274f2d354f3..f363fd6c7f4d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -443,7 +443,11 @@ static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner  *sp, stru
 	nfs_increment_open_seqid(status, o_arg->seqid);
 	if (status != 0)
 		goto out;
-	update_changeattr(dir, &o_res->cinfo);
+	if (o_arg->open_flags & O_CREAT) {
+		update_changeattr(dir, &o_res->cinfo);
+		nfs_post_op_update_inode(dir, o_res->dir_attr);
+	} else
+		nfs_refresh_inode(dir, o_res->dir_attr);
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
 		status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
 				sp, &o_res->stateid, o_arg->seqid);
@@ -497,7 +501,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
-	struct nfs_fattr        f_attr;
+	struct nfs_fattr f_attr, dir_attr;
 	struct nfs_openargs o_arg = {
 		.fh = NFS_FH(dir),
 		.open_flags = state->state,
@@ -507,6 +511,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 	};
 	struct nfs_openres o_res = {
 		.f_attr = &f_attr,
+		.dir_attr = &dir_attr,
 		.server = server,
 	};
 	int status = 0;
@@ -524,6 +529,7 @@ static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
 	if (o_arg.seqid == NULL)
 		goto out;
 	nfs_fattr_init(&f_attr);
+	nfs_fattr_init(&dir_attr);
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_nodeleg;
@@ -694,7 +700,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	struct nfs4_client *clp = server->nfs4_state;
 	struct inode *inode = NULL;
 	int                     status;
-	struct nfs_fattr        f_attr;
+	struct nfs_fattr f_attr, dir_attr;
 	struct nfs_openargs o_arg = {
 		.fh             = NFS_FH(dir),
 		.open_flags	= flags,
@@ -705,6 +711,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	};
 	struct nfs_openres o_res = {
 		.f_attr         = &f_attr,
+		.dir_attr	= &dir_attr,
 		.server         = server,
 	};
 
@@ -727,6 +734,7 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	if (o_arg.seqid == NULL)
 		return -ENOMEM;
 	nfs_fattr_init(&f_attr);
+	nfs_fattr_init(&dir_attr);
 	status = _nfs4_proc_open(dir, sp, &o_arg, &o_res);
 	if (status != 0)
 		goto out_err;
@@ -1746,6 +1754,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 		struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
+	struct nfs_fattr dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
@@ -1758,6 +1767,7 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 		.server = server,
 		.fh = fhandle,
 		.fattr = fattr,
+		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SYMLINK],
@@ -1770,10 +1780,12 @@ static int _nfs4_proc_symlink(struct inode *dir, struct qstr *name,
 		return -ENAMETOOLONG;
 	arg.u.symlink = path;
 	nfs_fattr_init(fattr);
+	nfs_fattr_init(&dir_fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (!status)
 		update_changeattr(dir, &res.dir_cinfo);
+	nfs_post_op_update_inode(dir, res.dir_fattr);
 	return status;
 }
 
@@ -1797,7 +1809,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_fh fhandle;
-	struct nfs_fattr fattr;
+	struct nfs_fattr fattr, dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
@@ -1810,6 +1822,7 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 		.server = server,
 		.fh = &fhandle,
 		.fattr = &fattr,
+		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
@@ -1819,10 +1832,12 @@ static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 	int			status;
 
 	nfs_fattr_init(&fattr);
+	nfs_fattr_init(&dir_fattr);
 	
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (!status) {
 		update_changeattr(dir, &res.dir_cinfo);
+		nfs_post_op_update_inode(dir, res.dir_fattr);
 		status = nfs_instantiate(dentry, &fhandle, &fattr);
 	}
 	return status;
@@ -1895,7 +1910,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs_fh fh;
-	struct nfs_fattr fattr;
+	struct nfs_fattr fattr, dir_fattr;
 	struct nfs4_create_arg arg = {
 		.dir_fh = NFS_FH(dir),
 		.server = server,
@@ -1907,6 +1922,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 		.server = server,
 		.fh = &fh,
 		.fattr = &fattr,
+		.dir_fattr = &dir_fattr,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
@@ -1917,6 +1933,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 	int                     mode = sattr->ia_mode;
 
 	nfs_fattr_init(&fattr);
+	nfs_fattr_init(&dir_fattr);
 
 	BUG_ON(!(sattr->ia_valid & ATTR_MODE));
 	BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
@@ -1938,6 +1955,7 @@ static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry,
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	if (status == 0) {
 		update_changeattr(dir, &res.dir_cinfo);
+		nfs_post_op_update_inode(dir, res.dir_fattr);
 		status = nfs_instantiate(dentry, &fh, &fattr);
 	}
 	return status;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8b21de8a06fa..7f91d613d31a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -95,6 +95,8 @@ static int nfs_stat_to_errno(int);
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
 #define encode_savefh_maxsz     (op_encode_hdr_maxsz)
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
+#define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
+#define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
 #define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
 #define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
 #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
@@ -336,14 +338,20 @@ static int nfs_stat_to_errno(int);
 				decode_getfh_maxsz)
 #define NFS4_enc_create_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
+				encode_savefh_maxsz + \
 				encode_create_maxsz + \
+				encode_getfh_maxsz + \
 				encode_getattr_maxsz + \
-				encode_getfh_maxsz)
+				encode_restorefh_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_create_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
+				decode_savefh_maxsz + \
 				decode_create_maxsz + \
+				decode_getfh_maxsz + \
 				decode_getattr_maxsz + \
-				decode_getfh_maxsz)
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_pathconf_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz)
@@ -1112,6 +1120,17 @@ static int encode_renew(struct xdr_stream *xdr, const struct nfs4_client *client
 	return 0;
 }
 
+static int
+encode_restorefh(struct xdr_stream *xdr)
+{
+	uint32_t *p;
+
+	RESERVE_SPACE(4);
+	WRITE32(OP_RESTOREFH);
+
+	return 0;
+}
+
 static int
 encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
 {
@@ -1358,7 +1377,7 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 4,
+		.nops = 7,
 	};
 	int status;
 
@@ -1366,10 +1385,16 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, const struct n
 	encode_compound_hdr(&xdr, &hdr);
 	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
 		goto out;
+	if ((status = encode_savefh(&xdr)) != 0)
+		goto out;
 	if ((status = encode_create(&xdr, args)) != 0)
 		goto out;
 	if ((status = encode_getfh(&xdr)) != 0)
 		goto out;
+	if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
+		goto out;
+	if ((status = encode_restorefh(&xdr)) != 0)
+		goto out;
 	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
@@ -1429,7 +1454,7 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 4,
+		.nops = 7,
 	};
 	int status;
 
@@ -1439,6 +1464,9 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
+	if (status)
+		goto out;
+	status = encode_savefh(&xdr);
 	if (status)
 		goto out;
 	status = encode_open(&xdr, args);
@@ -1448,6 +1476,12 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_opena
 	if (status)
 		goto out;
 	status = encode_getfattr(&xdr, args->bitmask);
+	if (status)
+		goto out;
+	status = encode_restorefh(&xdr);
+	if (status)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -3218,6 +3252,12 @@ static int decode_renew(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_RENEW);
 }
 
+static int
+decode_restorefh(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_RESTOREFH);
+}
+
 static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		size_t *acl_len)
 {
@@ -3510,13 +3550,17 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
+	if ((status = decode_savefh(&xdr)) != 0)
+		goto out;
 	if ((status = decode_create(&xdr,&res->dir_cinfo)) != 0)
 		goto out;
 	if ((status = decode_getfh(&xdr, res->fh)) != 0)
 		goto out;
-	status = decode_getfattr(&xdr, res->fattr, res->server);
-	if (status == NFS4ERR_DELAY)
-		status = 0;
+	if (decode_getfattr(&xdr, res->fattr, res->server) != 0)
+		goto out;
+	if ((status = decode_restorefh(&xdr)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->dir_fattr, res->server);
 out:
 	return status;
 }
@@ -3654,15 +3698,20 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_ope
         status = decode_putfh(&xdr);
         if (status)
                 goto out;
+        status = decode_savefh(&xdr);
+	if (status)
+		goto out;
         status = decode_open(&xdr, res);
         if (status)
                 goto out;
 	status = decode_getfh(&xdr, &res->fh);
         if (status)
 		goto out;
-	status = decode_getfattr(&xdr, res->f_attr, res->server);
-	if (status == NFS4ERR_DELAY)
-		status = 0;
+	if (decode_getfattr(&xdr, res->f_attr, res->server) != 0)
+		goto out;
+	if ((status = decode_restorefh(&xdr)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->dir_attr, res->server);
 out:
         return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index aeaee7e7c51d..6485b8b41b83 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -124,6 +124,7 @@ struct nfs_openres {
 	struct nfs4_change_info	cinfo;
 	__u32                   rflags;
 	struct nfs_fattr *      f_attr;
+	struct nfs_fattr *      dir_attr;
 	const struct nfs_server *server;
 	int			delegation_type;
 	nfs4_stateid		delegation;
@@ -540,6 +541,7 @@ struct nfs4_create_res {
 	struct nfs_fh *			fh;
 	struct nfs_fattr *		fattr;
 	struct nfs4_change_info		dir_cinfo;
+	struct nfs_fattr *		dir_fattr;
 };
 
 struct nfs4_fsinfo_arg {
-- 
cgit v1.2.3-58-ga151


From 3338c143b4fde2d256016b63043ec8e2c93eba19 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:41 -0400
Subject: NFS: Optimise attribute revalidation on close().

 Only force a getattr in nfs_file_flush() if the attribute
 cache is stale.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 572d8593486f..57d3e77d97ee 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -205,8 +205,8 @@ nfs_file_flush(struct file *file)
 	if (!status) {
 		status = ctx->error;
 		ctx->error = 0;
-		if (!status && !nfs_have_delegation(inode, FMODE_READ))
-			__nfs_revalidate_inode(NFS_SERVER(inode), inode);
+		if (!status)
+			nfs_revalidate_inode(NFS_SERVER(inode), inode);
 	}
 	unlock_kernel();
 	return status;
-- 
cgit v1.2.3-58-ga151


From 516a6af641bb50c608329a5bd751acd0d65cc4ab Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:41 -0400
Subject: NFS: Add optional post-op getattr instruction to the NFSv4 file
 close.

 "Optional" means that the close call will not fail if the getattr
 at the end of the compound fails.
 If it does succeed, try to refresh inode attributes.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       |  9 ++++++++-
 fs/nfs/nfs4xdr.c        | 34 ++++++++++++++++++++++++++++------
 include/linux/nfs_xdr.h |  3 +++
 3 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f363fd6c7f4d..7be3d2d15d6f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -865,6 +865,7 @@ struct nfs4_closedata {
 	struct nfs4_state *state;
 	struct nfs_closeargs arg;
 	struct nfs_closeres res;
+	struct nfs_fattr fattr;
 };
 
 static void nfs4_free_closedata(struct nfs4_closedata *calldata)
@@ -904,6 +905,7 @@ static void nfs4_close_done(struct rpc_task *task)
 				return;
 			}
 	}
+	nfs_refresh_inode(calldata->inode, calldata->res.fattr);
 	state->state = calldata->arg.open_flags;
 	nfs4_free_closedata(calldata);
 }
@@ -941,6 +943,7 @@ static void nfs4_close_begin(struct rpc_task *task)
 		rpc_exit(task, 0);
 		return;
 	}
+	nfs_fattr_init(calldata->res.fattr);
 	if (mode != 0)
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
 	calldata->arg.open_flags = mode;
@@ -960,6 +963,7 @@ static void nfs4_close_begin(struct rpc_task *task)
  */
 int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
 {
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_closedata *calldata;
 	int status = -ENOMEM;
 
@@ -974,8 +978,11 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode)
 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
 	if (calldata->arg.seqid == NULL)
 		goto out_free_calldata;
+	calldata->arg.bitmask = server->attr_bitmask;
+	calldata->res.fattr = &calldata->fattr;
+	calldata->res.server = server;
 
-	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_close_begin,
+	status = nfs4_call_async(server->client, nfs4_close_begin,
 			nfs4_close_done, calldata);
 	if (status == 0)
 		goto out;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 7f91d613d31a..cd9e26cfa868 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -198,17 +198,21 @@ static int nfs_stat_to_errno(int);
 #define NFS4_enc_open_downgrade_sz \
 				(compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
-                                op_encode_hdr_maxsz + 7)
+                                op_encode_hdr_maxsz + 7 + \
+				encode_getattr_maxsz)
 #define NFS4_dec_open_downgrade_sz \
 				(compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + 4)
+                                op_decode_hdr_maxsz + 4 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_close_sz       (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
-                                op_encode_hdr_maxsz + 5)
+                                op_encode_hdr_maxsz + 5 + \
+				encode_getattr_maxsz)
 #define NFS4_dec_close_sz       (compound_decode_hdr_maxsz + \
                                 decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + 4)
+                                op_decode_hdr_maxsz + 4 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_setattr_sz     (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + 4 + \
@@ -1433,7 +1437,7 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos
 {
         struct xdr_stream xdr;
         struct compound_hdr hdr = {
-                .nops   = 2,
+                .nops   = 3,
         };
         int status;
 
@@ -1443,6 +1447,9 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_clos
         if(status)
                 goto out;
         status = encode_close(&xdr, args);
+	if (status != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
         return status;
 }
@@ -1541,7 +1548,7 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops	= 2,
+		.nops	= 3,
 	};
 	int status;
 
@@ -1551,6 +1558,9 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct
 	if (status)
 		goto out;
 	status = encode_open_downgrade(&xdr, args);
+	if (status != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -3403,6 +3413,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, stru
         if (status)
                 goto out;
         status = decode_open_downgrade(&xdr, res);
+	if (status != 0)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
         return status;
 }
@@ -3678,6 +3691,15 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_cl
         if (status)
                 goto out;
         status = decode_close(&xdr, res);
+	if (status != 0)
+		goto out;
+	/*
+	 * Note: Server may do delete on close for this file
+	 * 	in which case the getattr call will fail with
+	 * 	an ESTALE error. Shouldn't be a problem,
+	 * 	though, since fattr->valid will remain unset.
+	 */
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
         return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6485b8b41b83..4f03dc21cf4a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -153,10 +153,13 @@ struct nfs_closeargs {
 	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 	int			open_flags;
+	const u32 *		bitmask;
 };
 
 struct nfs_closeres {
 	nfs4_stateid            stateid;
+	struct nfs_fattr *	fattr;
+	const struct nfs_server *server;
 };
 /*
  *  * Arguments to the lock,lockt, and locku call.
-- 
cgit v1.2.3-58-ga151


From cf809556149f076b7a020c10e066b2b96e79b6a1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:42 -0400
Subject: NFS: Ensure that nfs_link() instantiates the dentry correctly

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ce8f77dadff9..8272ed3fc707 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1432,17 +1432,14 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 		old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
 		dentry->d_parent->d_name.name, dentry->d_name.name);
 
-	/*
-	 * Drop the dentry in advance to force a new lookup.
-	 * Since nfs_proc_link doesn't return a file handle,
-	 * we can't use the existing dentry.
-	 */
 	lock_kernel();
-	d_drop(dentry);
-
 	nfs_begin_data_update(dir);
 	nfs_begin_data_update(inode);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
+	if (error == 0) {
+		atomic_inc(&inode->i_count);
+		d_instantiate(dentry, inode);
+	}
 	nfs_end_data_update(inode);
 	nfs_end_data_update(dir);
 	unlock_kernel();
-- 
cgit v1.2.3-58-ga151


From 91ba2eeec5e8e86e054937eb3bf5aec5b22b1830 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:42 -0400
Subject: NFSv4: Add post-op attributes to nfs4_proc_link()

 Optimise attribute revalidation when hardlinking. Add post-op attributes
 for the directory and the original inode.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 22 +++++++++++++++++-----
 fs/nfs/nfs4xdr.c        | 34 ++++++++++++++++++++++++++++------
 include/linux/nfs_xdr.h |  9 +++++++++
 3 files changed, 54 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7be3d2d15d6f..04995e39e867 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1724,22 +1724,34 @@ static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 
 static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
 {
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_link_arg arg = {
 		.fh     = NFS_FH(inode),
 		.dir_fh = NFS_FH(dir),
 		.name   = name,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs_fattr fattr, dir_attr;
+	struct nfs4_link_res res = {
+		.server = server,
+		.fattr = &fattr,
+		.dir_attr = &dir_attr,
 	};
-	struct nfs4_change_info	cinfo = { };
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
 		.rpc_argp = &arg,
-		.rpc_resp = &cinfo,
+		.rpc_resp = &res,
 	};
 	int			status;
 
-	status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
-	if (!status)
-		update_changeattr(dir, &cinfo);
+	nfs_fattr_init(res.fattr);
+	nfs_fattr_init(res.dir_attr);
+	status = rpc_call_sync(server->client, &msg, 0);
+	if (!status) {
+		update_changeattr(dir, &res.cinfo);
+		nfs_post_op_update_inode(dir, res.dir_attr);
+		nfs_refresh_inode(inode, res.fattr);
+	}
 
 	return status;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cd9e26cfa868..f624b693ce21 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -324,12 +324,18 @@ static int nfs_stat_to_errno(int);
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_putfh_maxsz + \
-				encode_link_maxsz)
+				encode_link_maxsz + \
+				decode_getattr_maxsz + \
+				encode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_dec_link_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_putfh_maxsz + \
-				decode_link_maxsz)
+				decode_link_maxsz + \
+				decode_getattr_maxsz + \
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_symlink_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_symlink_maxsz + \
@@ -1357,7 +1363,7 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 4,
+		.nops = 7,
 	};
 	int status;
 
@@ -1369,7 +1375,13 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, const struct nfs
 		goto out;
 	if ((status = encode_putfh(&xdr, args->dir_fh)) != 0)
 		goto out;
-	status = encode_link(&xdr, args->name);
+	if ((status = encode_link(&xdr, args->name)) != 0)
+		goto out;
+	if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
+		goto out;
+	if ((status = encode_restorefh(&xdr)) != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -3529,7 +3541,7 @@ out:
 /*
  * Decode LINK response
  */
-static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
+static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_link_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3544,7 +3556,17 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_ch
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
-	status = decode_link(&xdr, cinfo);
+	if ((status = decode_link(&xdr, &res->cinfo)) != 0)
+		goto out;
+	/*
+	 * Note order: OP_LINK leaves the directory as the current
+	 *             filehandle.
+	 */
+	if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0)
+		goto out;
+	if ((status = decode_restorefh(&xdr)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 4f03dc21cf4a..89238b799cfd 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -566,8 +566,17 @@ struct nfs4_link_arg {
 	const struct nfs_fh *		fh;
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
+	const u32 *			bitmask;
+};
+
+struct nfs4_link_res {
+	const struct nfs_server *	server;
+	struct nfs_fattr *		fattr;
+	struct nfs4_change_info		cinfo;
+	struct nfs_fattr *		dir_attr;
 };
 
+
 struct nfs4_lookup_arg {
 	const struct nfs_fh *		dir_fh;
 	const struct qstr *		name;
-- 
cgit v1.2.3-58-ga151


From 6caf2c8276d371679a798058e8fdf49f5ff831a3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:43 -0400
Subject: NFSv4: Add post-op attributes to nfs4_proc_rename()

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 15 +++++++++++++--
 fs/nfs/nfs4xdr.c        | 29 ++++++++++++++++++++++++-----
 include/linux/nfs_xdr.h |  4 ++++
 3 files changed, 41 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 04995e39e867..f96bc12c0fa0 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1685,13 +1685,20 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 		struct inode *new_dir, struct qstr *new_name)
 {
+	struct nfs_server *server = NFS_SERVER(old_dir);
 	struct nfs4_rename_arg arg = {
 		.old_dir = NFS_FH(old_dir),
 		.new_dir = NFS_FH(new_dir),
 		.old_name = old_name,
 		.new_name = new_name,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs_fattr old_fattr, new_fattr;
+	struct nfs4_rename_res res = {
+		.server = server,
+		.old_fattr = &old_fattr,
+		.new_fattr = &new_fattr,
 	};
-	struct nfs4_rename_res res = { };
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
 		.rpc_argp = &arg,
@@ -1699,11 +1706,15 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
 	};
 	int			status;
 	
-	status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
+	nfs_fattr_init(res.old_fattr);
+	nfs_fattr_init(res.new_fattr);
+	status = rpc_call_sync(server->client, &msg, 0);
 
 	if (!status) {
 		update_changeattr(old_dir, &res.old_cinfo);
+		nfs_post_op_update_inode(old_dir, res.old_fattr);
 		update_changeattr(new_dir, &res.new_cinfo);
+		nfs_post_op_update_inode(new_dir, res.new_fattr);
 	}
 	return status;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f624b693ce21..2a07755bd347 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -314,12 +314,18 @@ static int nfs_stat_to_errno(int);
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
 				encode_putfh_maxsz + \
-				encode_rename_maxsz)
+				encode_rename_maxsz + \
+				encode_getattr_maxsz + \
+				encode_restorefh_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_rename_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
 				decode_savefh_maxsz + \
 				decode_putfh_maxsz + \
-				decode_rename_maxsz)
+				decode_rename_maxsz + \
+				decode_getattr_maxsz + \
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_link_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
@@ -1339,7 +1345,7 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 4,
+		.nops = 7,
 	};
 	int status;
 
@@ -1351,7 +1357,13 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, const struct n
 		goto out;
 	if ((status = encode_putfh(&xdr, args->new_dir)) != 0)
 		goto out;
-	status = encode_rename(&xdr, args->old_name, args->new_name);
+	if ((status = encode_rename(&xdr, args->old_name, args->new_name)) != 0)
+		goto out;
+	if ((status = encode_getfattr(&xdr, args->bitmask)) != 0)
+		goto out;
+	if ((status = encode_restorefh(&xdr)) != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -3533,7 +3545,14 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
 		goto out;
 	if ((status = decode_putfh(&xdr)) != 0)
 		goto out;
-	status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo);
+	if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
+		goto out;
+	/* Current FH is target directory */
+	if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0)
+		goto out;
+	if ((status = decode_restorefh(&xdr)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->old_fattr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 89238b799cfd..6f0804280824 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -630,11 +630,15 @@ struct nfs4_rename_arg {
 	const struct nfs_fh *		new_dir;
 	const struct qstr *		old_name;
 	const struct qstr *		new_name;
+	const u32 *			bitmask;
 };
 
 struct nfs4_rename_res {
+	const struct nfs_server *	server;
 	struct nfs4_change_info		old_cinfo;
+	struct nfs_fattr *		old_fattr;
 	struct nfs4_change_info		new_cinfo;
+	struct nfs_fattr *		new_fattr;
 };
 
 struct nfs4_setclientid {
-- 
cgit v1.2.3-58-ga151


From 16e429596dec4d28e16812b3a9be27f18412c567 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:44 -0400
Subject: NFSv4: Add post-op attributes to nfs4_proc_remove()

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 27 +++++++++++++++++++++------
 fs/nfs/nfs4xdr.c        | 25 +++++++++++++++++--------
 include/linux/nfs_xdr.h |  7 +++++++
 3 files changed, 45 insertions(+), 14 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f96bc12c0fa0..bab47c4cb41c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1614,11 +1614,17 @@ out:
 
 static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 {
+	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs4_remove_arg args = {
 		.fh = NFS_FH(dir),
 		.name = name,
+		.bitmask = server->attr_bitmask,
+	};
+	struct nfs_fattr dir_attr;
+	struct nfs4_remove_res	res = {
+		.server = server,
+		.dir_attr = &dir_attr,
 	};
-	struct nfs4_change_info	res;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_REMOVE],
 		.rpc_argp	= &args,
@@ -1626,9 +1632,12 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 	};
 	int			status;
 
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	if (status == 0)
-		update_changeattr(dir, &res);
+	nfs_fattr_init(res.dir_attr);
+	status = rpc_call_sync(server->client, &msg, 0);
+	if (status == 0) {
+		update_changeattr(dir, &res.cinfo);
+		nfs_post_op_update_inode(dir, res.dir_attr);
+	}
 	return status;
 }
 
@@ -1646,12 +1655,14 @@ static int nfs4_proc_remove(struct inode *dir, struct qstr *name)
 
 struct unlink_desc {
 	struct nfs4_remove_arg	args;
-	struct nfs4_change_info	res;
+	struct nfs4_remove_res	res;
+	struct nfs_fattr dir_attr;
 };
 
 static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
 		struct qstr *name)
 {
+	struct nfs_server *server = NFS_SERVER(dir->d_inode);
 	struct unlink_desc *up;
 
 	up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
@@ -1660,6 +1671,9 @@ static int nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir,
 	
 	up->args.fh = NFS_FH(dir->d_inode);
 	up->args.name = name;
+	up->args.bitmask = server->attr_bitmask;
+	up->res.server = server;
+	up->res.dir_attr = &up->dir_attr;
 	
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
 	msg->rpc_argp = &up->args;
@@ -1674,7 +1688,8 @@ static int nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
 	
 	if (msg->rpc_resp != NULL) {
 		up = container_of(msg->rpc_resp, struct unlink_desc, res);
-		update_changeattr(dir->d_inode, &up->res);
+		update_changeattr(dir->d_inode, &up->res.cinfo);
+		nfs_post_op_update_inode(dir->d_inode, up->res.dir_attr);
 		kfree(up);
 		msg->rpc_resp = NULL;
 		msg->rpc_argp = NULL;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2a07755bd347..3ee3a1669d28 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -306,10 +306,12 @@ static int nfs_stat_to_errno(int);
 				decode_getfh_maxsz)
 #define NFS4_enc_remove_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				encode_remove_maxsz)
+				encode_remove_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_remove_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + 5)
+				op_decode_hdr_maxsz + 5 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_rename_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_savefh_maxsz + \
@@ -1327,14 +1329,18 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, const struct n
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
-	if ((status = encode_putfh(&xdr, args->fh)) == 0)
-		status = encode_remove(&xdr, args->name);
+	if ((status = encode_putfh(&xdr, args->fh)) != 0)
+		goto out;
+	if ((status = encode_remove(&xdr, args->name)) != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
 	return status;
 }
 
@@ -3512,7 +3518,7 @@ out:
 /*
  * Decode REMOVE response
  */
-static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_change_info *cinfo)
+static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_remove_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3521,8 +3527,11 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
 	if ((status = decode_compound_hdr(&xdr, &hdr)) != 0)
 		goto out;
-	if ((status = decode_putfh(&xdr)) == 0)
-		status = decode_remove(&xdr, cinfo);
+	if ((status = decode_putfh(&xdr)) != 0)
+		goto out;
+	if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
+		goto out;
+	decode_getfattr(&xdr, res->dir_attr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6f0804280824..deeba7e2c518 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -623,6 +623,13 @@ struct nfs4_readlink {
 struct nfs4_remove_arg {
 	const struct nfs_fh *		fh;
 	const struct qstr *		name;
+	const u32 *			bitmask;
+};
+
+struct nfs4_remove_res {
+	const struct nfs_server *	server;
+	struct nfs4_change_info		cinfo;
+	struct nfs_fattr *		dir_attr;
 };
 
 struct nfs4_rename_arg {
-- 
cgit v1.2.3-58-ga151


From 4f9838c7ecd14f31f701f64fa65ded132fc0db8a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:44 -0400
Subject: NFSv4: Add post-op attributes to NFSv4 write and commit callbacks.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 13 ++++++++++++-
 fs/nfs/nfs4xdr.c        | 28 ++++++++++++++++++++++------
 include/linux/nfs_xdr.h |  2 ++
 3 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bab47c4cb41c..933e13b383f8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2169,8 +2169,10 @@ nfs4_write_done(struct rpc_task *task)
 		rpc_restart_call(task);
 		return;
 	}
-	if (task->tk_status >= 0)
+	if (task->tk_status >= 0) {
 		renew_lease(NFS_SERVER(inode), data->timestamp);
+		nfs_post_op_update_inode(inode, data->res.fattr);
+	}
 	/* Call back common NFS writeback processing */
 	nfs_writeback_done(task);
 }
@@ -2186,6 +2188,7 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 		.rpc_cred = data->cred,
 	};
 	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	int stable;
 	int flags;
 	
@@ -2197,6 +2200,8 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 	} else
 		stable = NFS_UNSTABLE;
 	data->args.stable = stable;
+	data->args.bitmask = server->attr_bitmask;
+	data->res.server = server;
 
 	data->timestamp   = jiffies;
 
@@ -2218,6 +2223,8 @@ nfs4_commit_done(struct rpc_task *task)
 		rpc_restart_call(task);
 		return;
 	}
+	if (task->tk_status >= 0)
+		nfs_post_op_update_inode(inode, data->res.fattr);
 	/* Call back common NFS writeback processing */
 	nfs_commit_done(task);
 }
@@ -2233,8 +2240,12 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 		.rpc_cred = data->cred,
 	};	
 	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 	int flags;
 	
+	data->args.bitmask = server->attr_bitmask;
+	data->res.server = server;
+
 	/* Set the initial flags for the task.  */
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3ee3a1669d28..6f1bf182e0e0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -159,16 +159,20 @@ static int nfs_stat_to_errno(int);
 				op_decode_hdr_maxsz + 2)
 #define NFS4_enc_write_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				op_encode_hdr_maxsz + 8)
+				op_encode_hdr_maxsz + 8 + \
+				encode_getattr_maxsz)
 #define NFS4_dec_write_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + 4)
+				op_decode_hdr_maxsz + 4 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_commit_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				op_encode_hdr_maxsz + 3)
+				op_encode_hdr_maxsz + 3 + \
+				encode_getattr_maxsz)
 #define NFS4_dec_commit_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + 2)
+				op_decode_hdr_maxsz + 2 + \
+				decode_getattr_maxsz)
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
                                 op_encode_hdr_maxsz + \
@@ -1799,7 +1803,7 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writ
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
@@ -1809,6 +1813,9 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writ
 	if (status)
 		goto out;
 	status = encode_write(&xdr, args);
+	if (status)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -1820,7 +1827,7 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_wri
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
@@ -1830,6 +1837,9 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, uint32_t *p, struct nfs_wri
 	if (status)
 		goto out;
 	status = encode_commit(&xdr, args);
+	if (status)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
 out:
 	return status;
 }
@@ -4001,6 +4011,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_wr
 	if (status)
 		goto out;
 	status = decode_write(&xdr, res);
+	if (status)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 	if (!status)
 		status = res->count;
 out:
@@ -4024,6 +4037,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_w
 	if (status)
 		goto out;
 	status = decode_commit(&xdr, res);
+	if (status)
+		goto out;
+	decode_getfattr(&xdr, res->fattr, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index deeba7e2c518..40718669b9c8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -256,6 +256,7 @@ struct nfs_writeargs {
 	enum nfs3_stable_how	stable;
 	unsigned int		pgbase;
 	struct page **		pages;
+	const u32 *		bitmask;
 };
 
 struct nfs_writeverf {
@@ -267,6 +268,7 @@ struct nfs_writeres {
 	struct nfs_fattr *	fattr;
 	struct nfs_writeverf *	verf;
 	__u32			count;
+	const struct nfs_server *server;
 };
 
 /*
-- 
cgit v1.2.3-58-ga151


From 16c32b71bc53d6f7143702ebb264b4ef20d8f1e5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:45 -0400
Subject: NFSv4: Convert unnecessary XDR warning messages into dprintk()

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6f1bf182e0e0..fbbace8a30c4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2766,8 +2766,7 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re
 		goto xdr_error;
 	status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
 	return status;
 }
 	
@@ -2800,8 +2799,7 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
 
 	status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
 	return status;
 }
 
@@ -2826,8 +2824,7 @@ static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf
 
 	status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
 	return status;
 }
 
@@ -2886,8 +2883,7 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
 	if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
 		fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d\n", __FUNCTION__, -status);
 	return status;
 }
 
@@ -2920,8 +2916,7 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 
 	status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
-	if (status != 0)
-		printk(KERN_NOTICE "%s: xdr error %d!\n", __FUNCTION__, -status);
+	dprintk("%s: xdr returned %d!\n", __FUNCTION__, -status);
 	return status;
 }
 
@@ -3088,7 +3083,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
         p += bmlen;
 	return decode_delegation(xdr, res);
 xdr_error:
-	printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__);
+	dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
 	return -EIO;
 }
 
-- 
cgit v1.2.3-58-ga151


From bec273b491bd16351a9cdb8e9a51d30afa7fe9f4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 27 Oct 2005 22:12:45 -0400
Subject: NFS: Allow files that are open for write to invalidate caches

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6b3156e15350..f2781ca42761 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1057,15 +1057,11 @@ int nfs_open(struct inode *inode, struct file *filp)
 	ctx->mode = filp->f_mode;
 	nfs_file_set_open_context(filp, ctx);
 	put_nfs_open_context(ctx);
-	if ((filp->f_mode & FMODE_WRITE) != 0)
-		nfs_begin_data_update(inode);
 	return 0;
 }
 
 int nfs_release(struct inode *inode, struct file *filp)
 {
-	if ((filp->f_mode & FMODE_WRITE) != 0)
-		nfs_end_data_update(inode);
 	nfs_file_clear_open_context(filp);
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From 20e5c81fcff89535dced2ed71cf24c6c648ff40e Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Thu, 13 Oct 2005 21:48:42 +0200
Subject: [patch] remove gendisk->stamp_idle field

struct gendisk has these two fields: stamp, stamp_idle.  Update to
stamp_idle is always in sync with stamp and they are always the same.
Therefore, it does not add any value in having two fields tracking
same timestamp.  Suggest to remove it.

Also, we should only update gendisk stats with non-zero value.
Advantage is that we don't have to needlessly calculate memory address,
and then add zero to the content.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Signed-off-by: Jens Axboe <axboe@suse.de>
---
 drivers/block/ll_rw_blk.c | 11 +++++------
 fs/partitions/check.c     |  2 +-
 include/linux/genhd.h     |  2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
index baedac522945..c42071fd2e95 100644
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -2433,13 +2433,12 @@ void disk_round_stats(struct gendisk *disk)
 {
 	unsigned long now = jiffies;
 
-	__disk_stat_add(disk, time_in_queue,
-			disk->in_flight * (now - disk->stamp));
+	if (disk->in_flight) {
+		__disk_stat_add(disk, time_in_queue,
+				disk->in_flight * (now - disk->stamp));
+		__disk_stat_add(disk, io_ticks, (now - disk->stamp));
+	}
 	disk->stamp = now;
-
-	if (disk->in_flight)
-		__disk_stat_add(disk, io_ticks, (now - disk->stamp_idle));
-	disk->stamp_idle = now;
 }
 
 /*
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 77e178f13162..1e848648a322 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -430,7 +430,7 @@ void del_gendisk(struct gendisk *disk)
 	disk->flags &= ~GENHD_FL_UP;
 	unlink_gendisk(disk);
 	disk_stat_set_all(disk, 0);
-	disk->stamp = disk->stamp_idle = 0;
+	disk->stamp = 0;
 
 	devfs_remove_disk(disk);
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 01796c41c951..142e1c1e0689 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -119,7 +119,7 @@ struct gendisk {
 	int policy;
 
 	atomic_t sync_io;		/* RAID */
-	unsigned long stamp, stamp_idle;
+	unsigned long stamp;
 	int in_flight;
 #ifdef	CONFIG_SMP
 	struct disk_stats *dkstats;
-- 
cgit v1.2.3-58-ga151


From af4ca457eaf2d6682059c18463eb106e2ce58198 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Oct 2005 02:55:38 -0400
Subject: [PATCH] gfp_t: infrastructure

Beginning of gfp_t annotations:

 - -Wbitwise added to CHECKFLAGS
 - old __bitwise renamed to __bitwise__
 - __bitwise defined to either __bitwise__ or nothing, depending on
   __CHECK_ENDIAN__ being defined
 - gfp_t switched from __nocast to __bitwise__
 - force cast to gfp_t added to __GFP_... constants
 - new helper - gfp_zone(); extracts zone bits out of gfp_t value and casts
   the result to int

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Makefile              |  2 +-
 fs/buffer.c           |  2 +-
 include/linux/gfp.h   | 39 ++++++++++++++++++++-------------------
 include/linux/types.h |  9 +++++++--
 mm/mempolicy.c        |  6 +++---
 mm/page_alloc.c       |  4 ++--
 6 files changed, 34 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/Makefile b/Makefile
index 1fa7e5343464..f1d121f23025 100644
--- a/Makefile
+++ b/Makefile
@@ -334,7 +334,7 @@ KALLSYMS	= scripts/kallsyms
 PERL		= perl
 CHECK		= sparse
 
-CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ $(CF)
+CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF)
 MODFLAGS	= -DMODULE
 CFLAGS_MODULE   = $(MODFLAGS)
 AFLAGS_MODULE   = $(MODFLAGS)
diff --git a/fs/buffer.c b/fs/buffer.c
index 1216c0d3c8ce..9657696fd6d7 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -502,7 +502,7 @@ static void free_more_memory(void)
 	yield();
 
 	for_each_pgdat(pgdat) {
-		zones = pgdat->node_zonelists[GFP_NOFS&GFP_ZONEMASK].zones;
+		zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
 		if (*zones)
 			try_to_free_pages(zones, GFP_NOFS);
 	}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 3010e172394d..c3779432a723 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -12,8 +12,8 @@ struct vm_area_struct;
  * GFP bitmasks..
  */
 /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
-#define __GFP_DMA	0x01u
-#define __GFP_HIGHMEM	0x02u
+#define __GFP_DMA	((__force gfp_t)0x01u)
+#define __GFP_HIGHMEM	((__force gfp_t)0x02u)
 
 /*
  * Action modifiers - doesn't change the zoning
@@ -26,24 +26,24 @@ struct vm_area_struct;
  *
  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
  */
-#define __GFP_WAIT	0x10u	/* Can wait and reschedule? */
-#define __GFP_HIGH	0x20u	/* Should access emergency pools? */
-#define __GFP_IO	0x40u	/* Can start physical IO? */
-#define __GFP_FS	0x80u	/* Can call down to low-level FS? */
-#define __GFP_COLD	0x100u	/* Cache-cold page required */
-#define __GFP_NOWARN	0x200u	/* Suppress page allocation failure warning */
-#define __GFP_REPEAT	0x400u	/* Retry the allocation.  Might fail */
-#define __GFP_NOFAIL	0x800u	/* Retry for ever.  Cannot fail */
-#define __GFP_NORETRY	0x1000u	/* Do not retry.  Might fail */
-#define __GFP_NO_GROW	0x2000u	/* Slab internal usage */
-#define __GFP_COMP	0x4000u	/* Add compound page metadata */
-#define __GFP_ZERO	0x8000u	/* Return zeroed page on success */
-#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
-#define __GFP_NORECLAIM  0x20000u /* No realy zone reclaim during allocation */
-#define __GFP_HARDWALL   0x40000u /* Enforce hardwall cpuset memory allocs */
+#define __GFP_WAIT	((__force gfp_t)0x10u)	/* Can wait and reschedule? */
+#define __GFP_HIGH	((__force gfp_t)0x20u)	/* Should access emergency pools? */
+#define __GFP_IO	((__force gfp_t)0x40u)	/* Can start physical IO? */
+#define __GFP_FS	((__force gfp_t)0x80u)	/* Can call down to low-level FS? */
+#define __GFP_COLD	((__force gfp_t)0x100u)	/* Cache-cold page required */
+#define __GFP_NOWARN	((__force gfp_t)0x200u)	/* Suppress page allocation failure warning */
+#define __GFP_REPEAT	((__force gfp_t)0x400u)	/* Retry the allocation.  Might fail */
+#define __GFP_NOFAIL	((__force gfp_t)0x800u)	/* Retry for ever.  Cannot fail */
+#define __GFP_NORETRY	((__force gfp_t)0x1000u)/* Do not retry.  Might fail */
+#define __GFP_NO_GROW	((__force gfp_t)0x2000u)/* Slab internal usage */
+#define __GFP_COMP	((__force gfp_t)0x4000u)/* Add compound page metadata */
+#define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
+#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+#define __GFP_NORECLAIM  ((__force gfp_t)0x20000u) /* No realy zone reclaim during allocation */
+#define __GFP_HARDWALL   ((__force gfp_t)0x40000u) /* Enforce hardwall cpuset memory allocs */
 
 #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
-#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
+#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /* if you forget to add the bitmask here kernel will crash, period */
 #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
@@ -64,6 +64,7 @@ struct vm_area_struct;
 
 #define GFP_DMA		__GFP_DMA
 
+#define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK))
 
 /*
  * There is only one page-allocator function, and two main namespaces to
@@ -94,7 +95,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 		return NULL;
 
 	return __alloc_pages(gfp_mask, order,
-		NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+		NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
 }
 
 #ifdef CONFIG_NUMA
diff --git a/include/linux/types.h b/include/linux/types.h
index 0aee34f9da9f..21b9ce803644 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -151,7 +151,12 @@ typedef unsigned long sector_t;
  */
 
 #ifdef __CHECKER__
-#define __bitwise __attribute__((bitwise))
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+#ifdef __CHECK_ENDIAN__
+#define __bitwise __bitwise__
 #else
 #define __bitwise
 #endif
@@ -166,7 +171,7 @@ typedef __u64 __bitwise __be64;
 #endif
 
 #ifdef __KERNEL__
-typedef unsigned __nocast gfp_t;
+typedef unsigned __bitwise__ gfp_t;
 #endif
 
 struct ustat {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 37af443eb094..1d5c64df1653 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -700,7 +700,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
 	case MPOL_BIND:
 		/* Lower zones don't get a policy applied */
 		/* Careful: current->mems_allowed might have moved */
-		if ((gfp & GFP_ZONEMASK) >= policy_zone)
+		if (gfp_zone(gfp) >= policy_zone)
 			if (cpuset_zonelist_valid_mems_allowed(policy->v.zonelist))
 				return policy->v.zonelist;
 		/*FALL THROUGH*/
@@ -712,7 +712,7 @@ static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
 		nd = 0;
 		BUG();
 	}
-	return NODE_DATA(nd)->node_zonelists + (gfp & GFP_ZONEMASK);
+	return NODE_DATA(nd)->node_zonelists + gfp_zone(gfp);
 }
 
 /* Do dynamic interleaving for a process */
@@ -757,7 +757,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, unsigned ni
 	struct page *page;
 
 	BUG_ON(!node_online(nid));
-	zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK);
+	zl = NODE_DATA(nid)->node_zonelists + gfp_zone(gfp);
 	page = __alloc_pages(gfp, order, zl);
 	if (page && page_zone(page) == zl->zones[0]) {
 		zone_pcp(zl->zones[0],get_cpu())->interleave_hit++;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e1d3d77f4aee..aa43ae3ab8c9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1089,7 +1089,7 @@ static unsigned int nr_free_zone_pages(int offset)
  */
 unsigned int nr_free_buffer_pages(void)
 {
-	return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK);
+	return nr_free_zone_pages(gfp_zone(GFP_USER));
 }
 
 /*
@@ -1097,7 +1097,7 @@ unsigned int nr_free_buffer_pages(void)
  */
 unsigned int nr_free_pagecache_pages(void)
 {
-	return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+	return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
 }
 
 #ifdef CONFIG_HIGHMEM
-- 
cgit v1.2.3-58-ga151


From 27496a8c67bef4d789d8e3c8317ca35813a507ae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Oct 2005 03:20:48 -0400
Subject: [PATCH] gfp_t: fs/*

 - ->releasepage() annotated (s/int/gfp_t), instances updated
 - missing gfp_t in fs/* added
 - fixed misannotation from the original sweep caught by bitwise checks:
   XFS used __nocast both for gfp_t and for flags used by XFS allocator.
   The latter left with unsigned int __nocast; we might want to add a
   different type for those but for now let's leave them alone.  That,
   BTW, is a case when __nocast use had been actively confusing - it had
   been used in the same code for two different and similar types, with
   no way to catch misuses.  Switch of gfp_t to bitwise had caught that
   immediately...

One tricky bit is left alone to be dealt with later - mapping->flags is
a mix of gfp_t and error indications.  Left alone for now.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/afs/file.c               |  4 ++--
 fs/bio.c                    |  4 ++--
 fs/buffer.c                 |  2 +-
 fs/dcache.c                 |  2 +-
 fs/dquot.c                  |  2 +-
 fs/ext3/inode.c             |  2 +-
 fs/hfs/inode.c              |  2 +-
 fs/hfsplus/inode.c          |  2 +-
 fs/inode.c                  |  2 +-
 fs/jbd/journal.c            |  2 +-
 fs/jbd/transaction.c        |  2 +-
 fs/jfs/jfs_metapage.c       |  4 ++--
 fs/mbcache.c                |  6 +++---
 fs/reiserfs/fix_node.c      |  2 +-
 fs/reiserfs/inode.c         |  2 +-
 fs/xfs/linux-2.6/kmem.c     | 22 +++++++++++-----------
 fs/xfs/linux-2.6/kmem.h     | 18 +++++++++---------
 fs/xfs/linux-2.6/xfs_aops.c |  2 +-
 fs/xfs/linux-2.6/xfs_buf.c  |  8 ++++----
 include/linux/bio.h         |  2 +-
 include/linux/buffer_head.h |  2 +-
 include/linux/fs.h          |  2 +-
 include/linux/jbd.h         |  4 ++--
 include/linux/mbcache.h     |  2 +-
 include/linux/reiserfs_fs.h |  2 +-
 25 files changed, 52 insertions(+), 52 deletions(-)

(limited to 'fs')

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 23c125128024..0d576987ec67 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -29,7 +29,7 @@ static int afs_file_release(struct inode *inode, struct file *file);
 
 static int afs_file_readpage(struct file *file, struct page *page);
 static int afs_file_invalidatepage(struct page *page, unsigned long offset);
-static int afs_file_releasepage(struct page *page, int gfp_flags);
+static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
 
 static ssize_t afs_file_write(struct file *file, const char __user *buf,
 			      size_t size, loff_t *off);
@@ -279,7 +279,7 @@ static int afs_file_invalidatepage(struct page *page, unsigned long offset)
 /*
  * release a page and cleanup its private data
  */
-static int afs_file_releasepage(struct page *page, int gfp_flags)
+static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
 {
 	struct cachefs_page *pageio;
 
diff --git a/fs/bio.c b/fs/bio.c
index 7d81a93afd48..460554b07ff9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -778,7 +778,7 @@ static int bio_map_kern_endio(struct bio *bio, unsigned int bytes_done, int err)
 
 
 static struct bio *__bio_map_kern(request_queue_t *q, void *data,
-				  unsigned int len, unsigned int gfp_mask)
+				  unsigned int len, gfp_t gfp_mask)
 {
 	unsigned long kaddr = (unsigned long)data;
 	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -825,7 +825,7 @@ static struct bio *__bio_map_kern(request_queue_t *q, void *data,
  *	device. Returns an error pointer in case of error.
  */
 struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len,
-			 unsigned int gfp_mask)
+			 gfp_t gfp_mask)
 {
 	struct bio *bio;
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 9657696fd6d7..b1667986442f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1571,7 +1571,7 @@ static inline void discard_buffer(struct buffer_head * bh)
  *
  * NOTE: @gfp_mask may go away, and this function may become non-blocking.
  */
-int try_to_release_page(struct page *page, int gfp_mask)
+int try_to_release_page(struct page *page, gfp_t gfp_mask)
 {
 	struct address_space * const mapping = page->mapping;
 
diff --git a/fs/dcache.c b/fs/dcache.c
index fb10386c59be..e90512ed35a4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -689,7 +689,7 @@ void shrink_dcache_anon(struct hlist_head *head)
  *
  * In this case we return -1 to tell the caller that we baled.
  */
-static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
+static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
diff --git a/fs/dquot.c b/fs/dquot.c
index b9732335bcdc..05f3327d64a3 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -500,7 +500,7 @@ static void prune_dqcache(int count)
  * more memory
  */
 
-static int shrink_dqcache_memory(int nr, unsigned int gfp_mask)
+static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		spin_lock(&dq_list_lock);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index b5177c90d6f1..8b38f2232796 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1434,7 +1434,7 @@ static int ext3_invalidatepage(struct page *page, unsigned long offset)
 	return journal_invalidatepage(journal, page, offset);
 }
 
-static int ext3_releasepage(struct page *page, int wait)
+static int ext3_releasepage(struct page *page, gfp_t wait)
 {
 	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
 
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index f1570b9f9de3..3f680c5675bf 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -46,7 +46,7 @@ static sector_t hfs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, hfs_get_block);
 }
 
-static int hfs_releasepage(struct page *page, int mask)
+static int hfs_releasepage(struct page *page, gfp_t mask)
 {
 	struct inode *inode = page->mapping->host;
 	struct super_block *sb = inode->i_sb;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index d5642705f633..f205773ddfbe 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -40,7 +40,7 @@ static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, hfsplus_get_block);
 }
 
-static int hfsplus_releasepage(struct page *page, int mask)
+static int hfsplus_releasepage(struct page *page, gfp_t mask)
 {
 	struct inode *inode = page->mapping->host;
 	struct super_block *sb = inode->i_sb;
diff --git a/fs/inode.c b/fs/inode.c
index f80a79ff156b..7d3316527767 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -475,7 +475,7 @@ static void prune_icache(int nr_to_scan)
  * This function is passed the number of inodes to scan, and it returns the
  * total number of remaining possibly-reclaimable inodes.
  */
-static int shrink_icache_memory(int nr, unsigned int gfp_mask)
+static int shrink_icache_memory(int nr, gfp_t gfp_mask)
 {
 	if (nr) {
 		/*
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 7ae2c4fe506b..e4b516ac4989 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1606,7 +1606,7 @@ int journal_blocks_per_page(struct inode *inode)
  * Simple support for retrying memory allocations.  Introduced to help to
  * debug different VM deadlock avoidance strategies. 
  */
-void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry)
+void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 {
 	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
 }
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 49bbc2be3d72..13cb05bf6048 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1621,7 +1621,7 @@ out:
  * while the data is part of a transaction.  Yes?
  */
 int journal_try_to_free_buffers(journal_t *journal, 
-				struct page *page, int unused_gfp_mask)
+				struct page *page, gfp_t unused_gfp_mask)
 {
 	struct buffer_head *head;
 	struct buffer_head *bh;
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 13d7e3f1feb4..eeb37d70e650 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -198,7 +198,7 @@ static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 	}
 }
 
-static inline struct metapage *alloc_metapage(unsigned int gfp_mask)
+static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
 {
 	return mempool_alloc(metapage_mempool, gfp_mask);
 }
@@ -534,7 +534,7 @@ add_failed:
 	return -EIO;
 }
 
-static int metapage_releasepage(struct page *page, int gfp_mask)
+static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
 {
 	struct metapage *mp;
 	int busy = 0;
diff --git a/fs/mbcache.c b/fs/mbcache.c
index b002a088857d..298997f17475 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -116,7 +116,7 @@ mb_cache_indexes(struct mb_cache *cache)
  * What the mbcache registers as to get shrunk dynamically.
  */
 
-static int mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask);
+static int mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask);
 
 
 static inline int
@@ -140,7 +140,7 @@ __mb_cache_entry_unhash(struct mb_cache_entry *ce)
 
 
 static inline void
-__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask)
+__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
 {
 	struct mb_cache *cache = ce->e_cache;
 
@@ -193,7 +193,7 @@ forget:
  * Returns the number of objects which are present in the cache.
  */
 static int
-mb_cache_shrink_fn(int nr_to_scan, unsigned int gfp_mask)
+mb_cache_shrink_fn(int nr_to_scan, gfp_t gfp_mask)
 {
 	LIST_HEAD(free_list);
 	struct list_head *l, *ltmp;
diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c
index 2706e2adffab..45829889dcdc 100644
--- a/fs/reiserfs/fix_node.c
+++ b/fs/reiserfs/fix_node.c
@@ -2022,7 +2022,7 @@ static int get_neighbors(struct tree_balance *p_s_tb, int n_h)
 }
 
 #ifdef CONFIG_REISERFS_CHECK
-void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s)
+void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s)
 {
 	void *vp;
 	static size_t malloced;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index d76ee6c4f9b8..5f82352b97e1 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2842,7 +2842,7 @@ static int reiserfs_set_page_dirty(struct page *page)
  * even in -o notail mode, we can't be sure an old mount without -o notail
  * didn't create files with tails.
  */
-static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
+static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 {
 	struct inode *inode = page->mapping->host;
 	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb);
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index d2653b589b1c..3c92162dc728 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -45,11 +45,11 @@
 
 
 void *
-kmem_alloc(size_t size, gfp_t flags)
+kmem_alloc(size_t size, unsigned int __nocast flags)
 {
-	int		retries = 0;
-	unsigned int	lflags = kmem_flags_convert(flags);
-	void		*ptr;
+	int	retries = 0;
+	gfp_t	lflags = kmem_flags_convert(flags);
+	void	*ptr;
 
 	do {
 		if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
@@ -67,7 +67,7 @@ kmem_alloc(size_t size, gfp_t flags)
 }
 
 void *
-kmem_zalloc(size_t size, gfp_t flags)
+kmem_zalloc(size_t size, unsigned int __nocast flags)
 {
 	void	*ptr;
 
@@ -90,7 +90,7 @@ kmem_free(void *ptr, size_t size)
 
 void *
 kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
-	     gfp_t flags)
+	     unsigned int __nocast flags)
 {
 	void	*new;
 
@@ -105,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
 }
 
 void *
-kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags)
+kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 {
-	int		retries = 0;
-	unsigned int	lflags = kmem_flags_convert(flags);
-	void		*ptr;
+	int	retries = 0;
+	gfp_t	lflags = kmem_flags_convert(flags);
+	void	*ptr;
 
 	do {
 		ptr = kmem_cache_alloc(zone, lflags);
@@ -124,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, gfp_t flags)
 }
 
 void *
-kmem_zone_zalloc(kmem_zone_t *zone, gfp_t flags)
+kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
 {
 	void	*ptr;
 
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index ee7010f085bc..f4bb78c268c0 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t;
 	*(NSTATEP) = *(OSTATEP);	\
 } while (0)
 
-static __inline unsigned int kmem_flags_convert(gfp_t flags)
+static __inline gfp_t kmem_flags_convert(unsigned int __nocast flags)
 {
-	unsigned int	lflags = __GFP_NOWARN;	/* we'll report problems, if need be */
+	gfp_t lflags = __GFP_NOWARN;	/* we'll report problems, if need be */
 
 #ifdef DEBUG
 	if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
@@ -125,16 +125,16 @@ kmem_zone_destroy(kmem_zone_t *zone)
 		BUG();
 }
 
-extern void	    *kmem_zone_zalloc(kmem_zone_t *, gfp_t);
-extern void	    *kmem_zone_alloc(kmem_zone_t *, gfp_t);
+extern void	    *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
+extern void	    *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
 
-extern void	    *kmem_alloc(size_t, gfp_t);
-extern void	    *kmem_realloc(void *, size_t, size_t, gfp_t);
-extern void	    *kmem_zalloc(size_t, gfp_t);
+extern void	    *kmem_alloc(size_t, unsigned int __nocast);
+extern void	    *kmem_realloc(void *, size_t, size_t, unsigned int __nocast);
+extern void	    *kmem_zalloc(size_t, unsigned int __nocast);
 extern void         kmem_free(void *, size_t);
 
 typedef struct shrinker *kmem_shaker_t;
-typedef int (*kmem_shake_func_t)(int, unsigned int);
+typedef int (*kmem_shake_func_t)(int, gfp_t);
 
 static __inline kmem_shaker_t
 kmem_shake_register(kmem_shake_func_t sfunc)
@@ -149,7 +149,7 @@ kmem_shake_deregister(kmem_shaker_t shrinker)
 }
 
 static __inline int
-kmem_shake_allow(unsigned int gfp_mask)
+kmem_shake_allow(gfp_t gfp_mask)
 {
 	return (gfp_mask & __GFP_WAIT);
 }
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c6c077978fe3..7aa398724706 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1296,7 +1296,7 @@ linvfs_invalidate_page(
 STATIC int
 linvfs_release_page(
 	struct page		*page,
-	int			gfp_mask)
+	gfp_t			gfp_mask)
 {
 	struct inode		*inode = page->mapping->host;
 	int			dirty, delalloc, unmapped, unwritten;
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index e82cf72ac599..ba4767c04adf 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -64,7 +64,7 @@
 
 STATIC kmem_cache_t *pagebuf_zone;
 STATIC kmem_shaker_t pagebuf_shake;
-STATIC int xfsbufd_wakeup(int, unsigned int);
+STATIC int xfsbufd_wakeup(int, gfp_t);
 STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
 
 STATIC struct workqueue_struct *xfslogd_workqueue;
@@ -383,7 +383,7 @@ _pagebuf_lookup_pages(
 	size_t			blocksize = bp->pb_target->pbr_bsize;
 	size_t			size = bp->pb_count_desired;
 	size_t			nbytes, offset;
-	int			gfp_mask = pb_to_gfp(flags);
+	gfp_t			gfp_mask = pb_to_gfp(flags);
 	unsigned short		page_count, i;
 	pgoff_t			first;
 	loff_t			end;
@@ -1749,8 +1749,8 @@ STATIC int xfsbufd_force_sleep;
 
 STATIC int
 xfsbufd_wakeup(
-	int			priority,
-	unsigned int		mask)
+	int		priority,
+	gfp_t		mask)
 {
 	if (xfsbufd_force_sleep)
 		return 0;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3344b4e8e43a..685fd3720df5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -301,7 +301,7 @@ extern struct bio *bio_map_user_iov(struct request_queue *,
 				    struct sg_iovec *, int, int);
 extern void bio_unmap_user(struct bio *);
 extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
-				unsigned int);
+				gfp_t);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6a1d154c0825..88af42f5e04a 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -188,7 +188,7 @@ extern int buffer_heads_over_limit;
  * Generic address_space_operations implementations for buffer_head-backed
  * address_spaces.
  */
-int try_to_release_page(struct page * page, int gfp_mask);
+int try_to_release_page(struct page * page, gfp_t gfp_mask);
 int block_invalidatepage(struct page *page, unsigned long offset);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0b77c5af9a0..f83d997c5582 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -320,7 +320,7 @@ struct address_space_operations {
 	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
 	sector_t (*bmap)(struct address_space *, sector_t);
 	int (*invalidatepage) (struct page *, unsigned long);
-	int (*releasepage) (struct page *, int);
+	int (*releasepage) (struct page *, gfp_t);
 	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs);
 	struct page* (*get_xip_page)(struct address_space *, sector_t,
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index ff853b3173c6..be197eb90077 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -69,7 +69,7 @@ extern int journal_enable_debug;
 #define jbd_debug(f, a...)	/**/
 #endif
 
-extern void * __jbd_kmalloc (const char *where, size_t size, int flags, int retry);
+extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
 #define jbd_kmalloc(size, flags) \
 	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
 #define jbd_rep_kmalloc(size, flags) \
@@ -890,7 +890,7 @@ extern int	 journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
 extern int	 journal_invalidatepage(journal_t *,
 				struct page *, unsigned long);
-extern int	 journal_try_to_free_buffers(journal_t *, struct page *, int);
+extern int	 journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
 extern int	 journal_stop(handle_t *);
 extern int	 journal_flush (journal_t *);
 extern void	 journal_lock_updates (journal_t *);
diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
index 9263d2db2d67..99e044b4efc6 100644
--- a/include/linux/mbcache.h
+++ b/include/linux/mbcache.h
@@ -22,7 +22,7 @@ struct mb_cache_entry {
 };
 
 struct mb_cache_op {
-	int (*free)(struct mb_cache_entry *, int);
+	int (*free)(struct mb_cache_entry *, gfp_t);
 };
 
 /* Functions on caches */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index af00b10294cd..001ab82df051 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1972,7 +1972,7 @@ extern struct address_space_operations reiserfs_address_space_operations;
 
 /* fix_nodes.c */
 #ifdef CONFIG_REISERFS_CHECK
-void *reiserfs_kmalloc(size_t size, int flags, struct super_block *s);
+void *reiserfs_kmalloc(size_t size, gfp_t flags, struct super_block *s);
 void reiserfs_kfree(const void *vp, size_t size, struct super_block *s);
 #else
 static inline void *reiserfs_kmalloc(size_t size, int flags,
-- 
cgit v1.2.3-58-ga151


From c4cdd038318863e912e9b992489f61497f98b442 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Oct 2005 03:22:39 -0400
Subject: [PATCH] gfp_t: reiserfs mapping_set_gfp_mask() use

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 87ac9dc8b381..72e120798677 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -453,7 +453,7 @@ static struct page *reiserfs_get_page(struct inode *dir, unsigned long n)
 	struct page *page;
 	/* We can deadlock if we try to free dentries,
 	   and an unlink/rmdir has just occured - GFP_NOFS avoids this */
-	mapping->flags = (mapping->flags & ~__GFP_BITS_MASK) | GFP_NOFS;
+	mapping_set_gfp_mask(mapping, GFP_NOFS);
 	page = read_cache_page(mapping, n,
 			       (filler_t *) mapping->a_ops->readpage, NULL);
 	if (!IS_ERR(page)) {
-- 
cgit v1.2.3-58-ga151