From 62bac4af3d778f6d06d351c0442008967c512588 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 12:50:15 -0400 Subject: svcrpc: don't set then immediately clear XPT_DEFERRED There's no harm to doing this, since the only caller will immediately call svc_enqueue() afterwards, ensuring we don't miss the remaining deferred requests just because XPT_DEFERRED was briefly cleared. But why not just do this the simple way? Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c82fe739fbdc..a74cb67f15bf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1059,14 +1059,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) return NULL; spin_lock(&xprt->xpt_lock); - clear_bit(XPT_DEFERRED, &xprt->xpt_flags); if (!list_empty(&xprt->xpt_deferred)) { dr = list_entry(xprt->xpt_deferred.next, struct svc_deferred_req, handle.recent); list_del_init(&dr->handle.recent); - set_bit(XPT_DEFERRED, &xprt->xpt_flags); - } + } else + clear_bit(XPT_DEFERRED, &xprt->xpt_flags); spin_unlock(&xprt->xpt_lock); return dr; } -- cgit v1.2.3-58-ga151 From ca7896cd83456082b1e78816cdf7e41658ef7bcd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 14:12:40 -0400 Subject: nfsd4: centralize more calls to svc_xprt_received Follow up on b48fa6b99100dc7772af3cd276035fcec9719ceb by moving all the svc_xprt_received() calls for the main xprt to one place. The clearing of XPT_BUSY here is critical to the correctness of the server, so I'd prefer it to be obvious where we do it. The only substantive result is moving svc_xprt_received() after svc_receive_deferred(). Other than a (likely insignificant) delay waking up the next thread, that should be harmless. Also reshuffle the exit code a little to skip a few other steps that we don't care about the in the svc_delete_xprt() case. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a74cb67f15bf..dd61cd02461f 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -716,7 +716,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); svc_delete_xprt(xprt); - } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + /* Leave XPT_BUSY set on the dead xprt: */ + goto out; + } + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; newxpt = xprt->xpt_ops->xpo_accept(xprt); if (newxpt) { @@ -741,28 +744,23 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&serv->sv_lock); svc_xprt_received(newxpt); } - svc_xprt_received(xprt); } else { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); - if (rqstp->rq_deferred) { - svc_xprt_received(xprt); + if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); - } else { + else len = xprt->xpt_ops->xpo_recvfrom(rqstp); - svc_xprt_received(xprt); - } dprintk("svc: got len=%d\n", len); } + svc_xprt_received(xprt); /* No data, incomplete (TCP) read, or accept() */ - if (len == 0 || len == -EAGAIN) { - rqstp->rq_res.len = 0; - svc_xprt_release(rqstp); - return -EAGAIN; - } + if (len == 0 || len == -EAGAIN) + goto out; + clear_bit(XPT_OLD, &xprt->xpt_flags); rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); @@ -771,6 +769,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (serv->sv_stats) serv->sv_stats->netcnt++; return len; +out: + rqstp->rq_res.len = 0; + svc_xprt_release(rqstp); + return -EAGAIN; } EXPORT_SYMBOL_GPL(svc_recv); -- cgit v1.2.3-58-ga151 From f8c0d226fef05226ff1a85055c8ed663022f40c1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 18:11:21 -0400 Subject: svcrpc: simplify svc_close_all There's no need to be fooling with XPT_BUSY now that all the threads are gone. The list_del_init() here could execute at the same time as the svc_xprt_enqueue()'s list_add_tail(), with undefined results. We don't really care at this point, but it might result in a spurious list-corruption warning or something. And svc_close() isn't adding any value; just call svc_delete_xprt() directly. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index dd61cd02461f..8c018df80692 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -941,16 +941,16 @@ void svc_close_all(struct list_head *xprt_list) struct svc_xprt *xprt; struct svc_xprt *tmp; + /* + * The server is shutting down, and no more threads are running. + * svc_xprt_enqueue() might still be running, but at worst it + * will re-add the xprt to sp_sockets, which will soon get + * freed. So we don't bother with any more locking, and don't + * leave the close to the (nonexistent) server threads: + */ list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { set_bit(XPT_CLOSE, &xprt->xpt_flags); - if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { - /* Waiting to be processed, but no threads left, - * So just remove it from the waiting list - */ - list_del_init(&xprt->xpt_ready); - clear_bit(XPT_BUSY, &xprt->xpt_flags); - } - svc_close_xprt(xprt); + svc_delete_xprt(xprt); } } -- cgit v1.2.3-58-ga151 From b176331627fccc726d28f4fc4a357d1f3c19dbf0 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 25 Oct 2010 20:24:48 -0400 Subject: svcrpc: svc_close_xprt comment Neil Brown had to explain to me why we do this here; record the answer for posterity. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8c018df80692..bfbda676574a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -931,7 +931,12 @@ void svc_close_xprt(struct svc_xprt *xprt) if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) /* someone else will have to effect the close */ return; - + /* + * We expect svc_close_xprt() to work even when no threads are + * running (e.g., while configuring the server before starting + * any threads), so if the transport isn't busy, we delete + * it ourself: + */ svc_delete_xprt(xprt); } EXPORT_SYMBOL_GPL(svc_close_xprt); -- cgit v1.2.3-58-ga151 From 9c335c0b8daf56b9f73479d00b1dd726e1fcca09 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 26 Oct 2010 11:32:03 -0400 Subject: svcrpc: fix wspace-checking race We call svc_xprt_enqueue() after something happens which we think may require handling from a server thread. To avoid such events being lost, svc_xprt_enqueue() must guarantee that there will be a svc_serv() call from a server thread following any such event. It does that by either waking up a server thread itself, or checking that XPT_BUSY is set (in which case somebody else is doing it). But the check of XPT_BUSY could occur just as someone finishes processing some other event, and just before they clear XPT_BUSY. Therefore it's important not to clear XPT_BUSY without subsequently doing another svc_export_enqueue() to check whether the xprt should be requeued. The xpo_wspace() check in svc_xprt_enqueue() breaks this rule, allowing an event to be missed in situations like: data arrives call svc_tcp_data_ready(): call svc_xprt_enqueue(): set BUSY find no write space svc_reserve(): free up write space call svc_enqueue(): test BUSY clear BUSY So, instead, check wspace in the same places that the state flags are checked: before taking BUSY, and in svc_receive(). Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index bfbda676574a..b6f47da170b3 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -302,6 +302,15 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) list_del(&rqstp->rq_list); } +static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) +{ + if (xprt->xpt_flags & ((1<xpt_flags & ((1<xpt_ops->xpo_has_wspace(xprt); + return false; +} + /* * Queue up a transport with data pending. If there are idle nfsd * processes, wake 'em up. @@ -314,8 +323,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) struct svc_rqst *rqstp; int cpu; - if (!(xprt->xpt_flags & - ((1<xpt_pool != NULL); xprt->xpt_pool = pool; - /* Handle pending connection */ - if (test_bit(XPT_CONN, &xprt->xpt_flags)) - goto process; - - /* Handle close in-progress */ - if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) - goto process; - - /* Check if we have space to reply to a request */ - if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { - /* Don't enqueue while not enough space for reply */ - dprintk("svc: no write space, transport %p not enqueued\n", - xprt); - xprt->xpt_pool = NULL; - clear_bit(XPT_BUSY, &xprt->xpt_flags); - goto out_unlock; - } - - process: if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, @@ -744,7 +733,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) spin_unlock_bh(&serv->sv_lock); svc_xprt_received(newxpt); } - } else { + } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", rqstp, pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); -- cgit v1.2.3-58-ga151 From 7c96aef75949a56ec427fc6a2522dace2af33605 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Nov 2010 11:27:01 +1100 Subject: sunrpc: remove xpt_pool The xpt_pool field is only used for reporting BUGs. And it isn't used correctly. In particular, when it is cleared in svc_xprt_received before XPT_BUSY is cleared, there is no guarantee that either the compiler or the CPU might not re-order to two assignments, just setting xpt_pool to NULL after XPT_BUSY is cleared. If a different cpu were running svc_xprt_enqueue at this moment, it might see XPT_BUSY clear and then xpt_pool non-NULL, and so BUG. This could be fixed by calling smp_mb__before_clear_bit() before the clear_bit. However as xpt_pool isn't really used, it seems safest to simply remove xpt_pool. Another alternate would be to change the clear_bit to clear_bit_unlock, and the test_and_set_bit to test_and_set_bit_lock. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 - net/sunrpc/svc_xprt.c | 5 ----- 2 files changed, 6 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index aea0d438e3c7..c8f81da15c7e 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -63,7 +63,6 @@ struct svc_xprt { #define XPT_LISTENER 11 /* listening endpoint */ #define XPT_CACHE_AUTH 12 /* cache auth info */ - struct svc_pool *xpt_pool; /* current pool iff queued */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ struct mutex xpt_mutex; /* to serialize sending data */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5a75d23645c8..5eae53b1e306 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -351,8 +351,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) dprintk("svc: transport %p busy, not enqueued\n", xprt); goto out_unlock; } - BUG_ON(xprt->xpt_pool != NULL); - xprt->xpt_pool = pool; if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, @@ -370,13 +368,11 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp->rq_reserved = serv->sv_max_mesg; atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); pool->sp_stats.threads_woken++; - BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); pool->sp_stats.sockets_queued++; - BUG_ON(xprt->xpt_pool != pool); } out_unlock: @@ -415,7 +411,6 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) void svc_xprt_received(struct svc_xprt *xprt) { BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); - xprt->xpt_pool = NULL; /* As soon as we clear busy, the xprt could be closed and * 'put', so we need a reference to call svc_xprt_enqueue with: */ -- cgit v1.2.3-58-ga151 From 3942302ea9e1dffa933021b20bf1642046e7641b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 15 Nov 2010 11:27:01 +1100 Subject: sunrpc: svc_sock_names should hold ref to socket being closed. Currently svc_sock_names calls svc_close_xprt on a svc_sock to which it does not own a reference. As soon as svc_close_xprt sets XPT_CLOSE, the socket could be freed by a separate thread (though this is a very unlikely race). It is safer to hold a reference while calling svc_close_xprt. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 07919e16be3e..52bd1130e83a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -324,19 +324,21 @@ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, len = onelen; break; } - if (toclose && strcmp(toclose, buf + len) == 0) + if (toclose && strcmp(toclose, buf + len) == 0) { closesk = svsk; - else + svc_xprt_get(&closesk->sk_xprt); + } else len += onelen; } spin_unlock_bh(&serv->sv_lock); - if (closesk) + if (closesk) { /* Should unregister with portmap, but you cannot * unregister just one protocol... */ svc_close_xprt(&closesk->sk_xprt); - else if (toclose) + svc_xprt_put(&closesk->sk_xprt); + } else if (toclose) return -ENOENT; return len; } -- cgit v1.2.3-58-ga151 From 66c941f4aa8aef397a757001af61073db23b39e5 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 15 Dec 2010 14:47:20 +0800 Subject: net: sunrpc: kill unused macros These macros never be used for several years. Signed-off-by: Shan Wei Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/svcauth_gss.c | 2 -- net/sunrpc/svcauth.c | 1 - net/sunrpc/svcauth_unix.c | 2 -- 3 files changed, 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index dec2a6fc7c12..bcdae78fdfc6 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -67,7 +67,6 @@ static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b) #define RSI_HASHBITS 6 #define RSI_HASHMAX (1< Date: Sun, 14 Nov 2010 18:08:11 -0800 Subject: net/sunrpc/auth_gss/gss_krb5_crypto.c: Use normal negative error value return And remove unnecessary double semicolon too. No effect to code, as test is != 0. Signed-off-by: Joe Perches Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/gss_krb5_crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 75ee993ea057..9576f35ab701 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -137,7 +137,7 @@ arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4]) ms_usage = 13; break; default: - return EINVAL;; + return -EINVAL; } salt[0] = (ms_usage >> 0) & 0xff; salt[1] = (ms_usage >> 8) & 0xff; -- cgit v1.2.3-58-ga151 From 31f7aa65f536995c6d933c57230919ae408952a5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 24 Dec 2010 14:03:38 -0500 Subject: svcrpc: modifying valid sunrpc cache entries is racy Once a sunrpc cache entry is VALID, we should be replacing it (and allowing any concurrent users to destroy it on last put) instead of trying to update it in place. Otherwise someone referencing the ip_map we're modifying here could try to use the m_client just as we're putting the last reference. The bug should only be seen by users of the legacy nfsd interfaces. (Thanks to Neil for suggestion to use sunrpc_invalidate.) Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/svcauth_unix.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a04ac9193d59..59a7c524a8b1 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -401,8 +401,7 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) return NULL; if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) { - if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0) - auth_domain_put(&ipm->m_client->h); + sunrpc_invalidate(&ipm->h, sn->ip_map_cache); rv = NULL; } else { rv = &ipm->m_client->h; -- cgit v1.2.3-58-ga151 From bdd5f05d91e8ae68075b812ce244c918d3d752cd Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 13:31:45 -0500 Subject: SUNRPC: Remove more code when NFSD_DEPRECATED is not configured Signed-off-by: NeilBrown [bfields@redhat.com: moved svcauth_unix_purge outside ifdef's.] Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 3 +++ net/sunrpc/svcauth_unix.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'net') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 6950c981882d..c2aebe8eeffd 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -255,10 +255,13 @@ static inline time_t get_expiry(char **bpp) return rv - boot.tv_sec; } +#ifdef CONFIG_NFSD_DEPRECATED static inline void sunrpc_invalidate(struct cache_head *h, struct cache_detail *detail) { h->expiry_time = seconds_since_boot() - 1; detail->nextcheck = seconds_since_boot(); } +#endif /* CONFIG_NFSD_DEPRECATED */ + #endif /* _LINUX_SUNRPC_CACHE_H_ */ diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 59a7c524a8b1..30916b06c12b 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -30,7 +30,9 @@ struct unix_domain { struct auth_domain h; +#ifdef CONFIG_NFSD_DEPRECATED int addr_changes; +#endif /* CONFIG_NFSD_DEPRECATED */ /* other stuff later */ }; @@ -64,7 +66,9 @@ struct auth_domain *unix_domain_find(char *name) return NULL; } new->h.flavour = &svcauth_unix; +#ifdef CONFIG_NFSD_DEPRECATED new->addr_changes = 0; +#endif /* CONFIG_NFSD_DEPRECATED */ rv = auth_domain_lookup(name, &new->h); } } @@ -91,7 +95,9 @@ struct ip_map { char m_class[8]; /* e.g. "nfsd" */ struct in6_addr m_addr; struct unix_domain *m_client; +#ifdef CONFIG_NFSD_DEPRECATED int m_add_change; +#endif /* CONFIG_NFSD_DEPRECATED */ }; static void ip_map_put(struct kref *kref) @@ -145,7 +151,9 @@ static void update(struct cache_head *cnew, struct cache_head *citem) kref_get(&item->m_client->h.ref); new->m_client = item->m_client; +#ifdef CONFIG_NFSD_DEPRECATED new->m_add_change = item->m_add_change; +#endif /* CONFIG_NFSD_DEPRECATED */ } static struct cache_head *ip_map_alloc(void) { @@ -330,6 +338,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, ip.h.flags = 0; if (!udom) set_bit(CACHE_NEGATIVE, &ip.h.flags); +#ifdef CONFIG_NFSD_DEPRECATED else { ip.m_add_change = udom->addr_changes; /* if this is from the legacy set_client system call, @@ -338,6 +347,7 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, if (expiry == NEVER) ip.m_add_change++; } +#endif /* CONFIG_NFSD_DEPRECATED */ ip.h.expiry_time = expiry; ch = sunrpc_cache_update(cd, &ip.h, &ipm->h, hash_str(ipm->m_class, IP_HASHBITS) ^ @@ -357,6 +367,7 @@ static inline int ip_map_update(struct net *net, struct ip_map *ipm, return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry); } +#ifdef CONFIG_NFSD_DEPRECATED int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom) { struct unix_domain *udom; @@ -411,6 +422,7 @@ struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr) return rv; } EXPORT_SYMBOL_GPL(auth_unix_lookup); +#endif /* CONFIG_NFSD_DEPRECATED */ void svcauth_unix_purge(void) { -- cgit v1.2.3-58-ga151 From d76d1815f3e72fb627ad7f95ef63120b0a557c9c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 21:28:34 -0500 Subject: svcrpc: avoid double reply caused by deferral race Commit d29068c431599fa "sunrpc: Simplify cache_defer_req and related functions." asserted that cache_check() could determine success or failure of cache_defer_req() by checking the CACHE_PENDING bit. This isn't quite right. We need to know whether cache_defer_req() created a deferred request, in which case sending an rpc reply has become the responsibility of the deferred request, and it is important that we not send our own reply, resulting in two different replies to the same request. And the CACHE_PENDING bit doesn't tell us that; we could have succesfully created a deferred request at the same time as another thread cleared the CACHE_PENDING bit. So, partially revert that commit, to ensure that cache_check() returns -EAGAIN if and only if a deferred request has been created. Signed-off-by: J. Bruce Fields Acked-by: NeilBrown --- net/sunrpc/cache.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index e433e7580e27..0d6002f26d82 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -37,7 +37,7 @@ #define RPCDBG_FACILITY RPCDBG_CACHE -static void cache_defer_req(struct cache_req *req, struct cache_head *item); +static bool cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h) @@ -268,9 +268,11 @@ int cache_check(struct cache_detail *detail, } if (rv == -EAGAIN) { - cache_defer_req(rqstp, h); - if (!test_bit(CACHE_PENDING, &h->flags)) { - /* Request is not deferred */ + if (!cache_defer_req(rqstp, h)) { + /* + * Request was not deferred; handle it as best + * we can ourselves: + */ rv = cache_is_valid(detail, h); if (rv == -EAGAIN) rv = -ETIMEDOUT; @@ -618,18 +620,19 @@ static void cache_limit_defers(void) discard->revisit(discard, 1); } -static void cache_defer_req(struct cache_req *req, struct cache_head *item) +/* Return true if and only if a deferred request is queued. */ +static bool cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; if (req->thread_wait) { cache_wait_req(req, item); if (!test_bit(CACHE_PENDING, &item->flags)) - return; + return false; } dreq = req->defer(req); if (dreq == NULL) - return; + return false; setup_deferral(dreq, item, 1); if (!test_bit(CACHE_PENDING, &item->flags)) /* Bit could have been cleared before we managed to @@ -638,6 +641,7 @@ static void cache_defer_req(struct cache_req *req, struct cache_head *item) cache_revisit_request(item); cache_limit_defers(); + return true; } static void cache_revisit_request(struct cache_head *item) -- cgit v1.2.3-58-ga151 From 9e701c610923aaeac8b38b9202a686d1cc9ee35d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 2 Jan 2011 21:56:36 -0500 Subject: svcrpc: simpler request dropping Currently we use -EAGAIN returns to determine when to drop a deferred request. On its own, that is error-prone, as it makes us treat -EAGAIN returns from other functions specially to prevent inadvertent dropping. So, use a flag on the request instead. Returning an error on request deferral is still required, to prevent further processing, but we no longer need worry that an error return on its own could result in a drop. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 2 +- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svc.c | 3 ++- net/sunrpc/svc_xprt.c | 1 + 4 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 2bae1d86f5f2..18743c4d8bca 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -608,7 +608,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Now call the procedure handler, and encode NFS status. */ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); nfserr = map_new_errors(rqstp->rq_vers, nfserr); - if (nfserr == nfserr_dropit) { + if (nfserr == nfserr_dropit || rqstp->rq_dropme) { dprintk("nfsd: Dropping request; may be revisited later\n"); nfsd_cache_update(rqstp, RC_NOCACHE, NULL); return 0; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 5a3085b9b394..d45c482b6444 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -269,6 +269,7 @@ struct svc_rqst { struct cache_req rq_chandle; /* handle passed to caches for * request delaying */ + bool rq_dropme; /* Catering to nfsd */ struct auth_domain * rq_client; /* RPC peer info */ struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 6359c42c4941..df1931f8ae98 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1005,6 +1005,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) rqstp->rq_splice_ok = 1; /* Will be turned off only when NFSv4 Sessions are used */ rqstp->rq_usedeferral = 1; + rqstp->rq_dropme = false; /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); @@ -1106,7 +1107,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); /* Encode reply */ - if (*statp == rpc_drop_reply) { + if (rqstp->rq_dropme) { if (procp->pc_release) procp->pc_release(rqstp, NULL, rqstp->rq_resp); goto dropit; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 5eae53b1e306..173f3b975d49 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1019,6 +1019,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) } svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; + rqstp->rq_dropme = true; dr->handle.revisit = svc_revisit; return &dr->handle; -- cgit v1.2.3-58-ga151 From 6bab93f87ec703bf6650875881b11f9f27d7da56 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 3 Jan 2011 15:10:27 -0500 Subject: svcrpc: take lock on turning entry NEGATIVE in cache_check We attempt to turn a cache entry negative in place. But that entry may already have been filled in by some other task since we last checked whether it was valid, so we could be modifying an already-valid entry. If nothing else there's a likely leak in such a case when the entry is eventually put() and contents are not freed because it has CACHE_NEGATIVE set. So, take the cache_lock just as sunrpc_cache_update() does. Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 0d6002f26d82..a6c57334fa13 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -213,6 +213,23 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head } } +static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h) +{ + int rv; + + write_lock(&detail->hash_lock); + rv = cache_is_valid(detail, h); + if (rv != -EAGAIN) { + write_unlock(&detail->hash_lock); + return rv; + } + set_bit(CACHE_NEGATIVE, &h->flags); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); + write_unlock(&detail->hash_lock); + cache_fresh_unlocked(h, detail); + return -ENOENT; +} + /* * This is the generic cache management routine for all * the authentication caches. @@ -251,14 +268,8 @@ int cache_check(struct cache_detail *detail, case -EINVAL: clear_bit(CACHE_PENDING, &h->flags); cache_revisit_request(h); - if (rv == -EAGAIN) { - set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); - cache_fresh_unlocked(h, detail); - rv = -ENOENT; - } + rv = try_to_negate_entry(detail, h); break; - case -EAGAIN: clear_bit(CACHE_PENDING, &h->flags); cache_revisit_request(h); -- cgit v1.2.3-58-ga151 From fdef7aa5d4020fd94ffcbf0078d6bd9e5a111e19 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 4 Jan 2011 14:12:47 -0500 Subject: svcrpc: ensure cache_check caller sees updated entry Supposes cache_check runs simultaneously with an update on a different CPU: cache_check task doing update ^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^ 1. test for CACHE_VALID 1'. set entry->data & !CACHE_NEGATIVE 2. use entry->data 2'. set CACHE_VALID If the two memory writes performed in step 1' and 2' appear misordered with respect to the reads in step 1 and 2, then the caller could get stale data at step 2 even though it saw CACHE_VALID set on the cache entry. Add memory barriers to prevent this. Reviewed-by: NeilBrown Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index a6c57334fa13..72ad836e4fe0 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -128,6 +128,7 @@ static void cache_fresh_locked(struct cache_head *head, time_t expiry) { head->expiry_time = expiry; head->last_refresh = seconds_since_boot(); + smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */ set_bit(CACHE_VALID, &head->flags); } @@ -208,8 +209,16 @@ static inline int cache_is_valid(struct cache_detail *detail, struct cache_head /* entry is valid */ if (test_bit(CACHE_NEGATIVE, &h->flags)) return -ENOENT; - else + else { + /* + * In combination with write barrier in + * sunrpc_cache_update, ensures that anyone + * using the cache entry after this sees the + * updated contents: + */ + smp_rmb(); return 0; + } } } -- cgit v1.2.3-58-ga151 From d75faea330dbd1873c9094e9926ae306590c0998 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 30 Nov 2010 19:15:01 -0500 Subject: rpc: move sk_bc_xprt to svc_xprt This seems obviously transport-level information even if it's currently used only by the server socket code. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + include/linux/sunrpc/svcsock.h | 1 - net/sunrpc/svcsock.c | 10 ++++++---- net/sunrpc/xprtsock.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index c8f81da15c7e..7ad9751a0d87 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -79,6 +79,7 @@ struct svc_xprt { struct list_head xpt_users; /* callbacks on free */ struct net *xpt_net; + struct rpc_xprt *xpt_bc_xprt; /* NFSv4.1 backchannel */ }; static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u) diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 1b353a76c304..04dba23c59f2 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,7 +28,6 @@ struct svc_sock { /* private TCP part */ u32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ - struct rpc_xprt *sk_bc_xprt; /* NFSv4.1 backchannel xprt */ }; /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 52bd1130e83a..3bb400f86eae 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -987,15 +987,17 @@ static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, vec[0] = rqstp->rq_arg.head[0]; } else { /* REPLY */ - if (svsk->sk_bc_xprt) - req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid); + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; + + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); if (!req) { printk(KERN_NOTICE "%s: Got unrecognized reply: " - "calldir 0x%x sk_bc_xprt %p xid %08x\n", + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", __func__, ntohl(calldir), - svsk->sk_bc_xprt, xid); + bc_xprt, xid); vec[0] = rqstp->rq_arg.head[0]; goto out; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dfcab5ac65af..18dc42eb5597 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2379,9 +2379,9 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) * The backchannel uses the same socket connection as the * forechannel */ + args->bc_xprt->xpt_bc_xprt = xprt; xprt->bc_xprt = args->bc_xprt; bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); - bc_sock->sk_bc_xprt = xprt; transport->sock = bc_sock->sk_sock; transport->inet = bc_sock->sk_sk; -- cgit v1.2.3-58-ga151 From 99de8ea962bbc11a51ad4c52e3dc93bee5f6ba70 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 Dec 2010 12:45:44 -0500 Subject: rpc: keep backchannel xprt as long as server connection Multiple backchannels can share the same tcp connection; from rfc 5661 section 2.10.3.1: A connection's association with a session is not exclusive. A connection associated with the channel(s) of one session may be simultaneously associated with the channel(s) of other sessions including sessions associated with other client IDs. However, multiple backchannels share a connection, they must all share the same xid stream (hence the same rpc_xprt); the only way we have to match replies with calls at the rpc layer is using the xid. So, keep the rpc_xprt around as long as the connection lasts, in case we're asked to use the connection as a backchannel again. Requests to create new backchannel clients over a given server connection should results in creating new clients that reuse the existing rpc_xprt. But to start, just reject attempts to associate multiple rpc_xprt's with the same underlying bc_xprt. Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 4 ++++ net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 34 ++++++++++++++++++++++++---------- 3 files changed, 29 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 173f3b975d49..ab86b7927f84 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -13,6 +13,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -128,6 +129,9 @@ static void svc_xprt_free(struct kref *kref) if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) svcauth_unix_info_release(xprt); put_net(xprt->xpt_net); + /* See comment on corresponding get in xs_setup_bc_tcp(): */ + if (xprt->xpt_bc_xprt) + xprt_put(xprt->xpt_bc_xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4c8f18aff7c3..749ad15ae305 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -965,6 +965,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) xprt = kzalloc(size, GFP_KERNEL); if (xprt == NULL) goto out; + kref_init(&xprt->kref); xprt->max_reqs = max_req; xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); @@ -1102,7 +1103,6 @@ found: return xprt; } - kref_init(&xprt->kref); spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 18dc42eb5597..0ef4dd4414ec 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2375,16 +2375,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->reestablish_timeout = 0; xprt->idle_timeout = 0; - /* - * The backchannel uses the same socket connection as the - * forechannel - */ - args->bc_xprt->xpt_bc_xprt = xprt; - xprt->bc_xprt = args->bc_xprt; - bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); - transport->sock = bc_sock->sk_sock; - transport->inet = bc_sock->sk_sk; - xprt->ops = &bc_tcp_ops; switch (addr->sa_family) { @@ -2406,6 +2396,29 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->address_strings[RPC_DISPLAY_PORT], xprt->address_strings[RPC_DISPLAY_PROTO]); + /* + * The backchannel uses the same socket connection as the + * forechannel + */ + if (args->bc_xprt->xpt_bc_xprt) { + /* XXX: actually, want to catch this case... */ + ret = ERR_PTR(-EINVAL); + goto out_err; + } + /* + * Once we've associated a backchannel xprt with a connection, + * we want to keep it around as long as long as the connection + * lasts, in case we need to start using it for a backchannel + * again; this reference won't be dropped until bc_xprt is + * destroyed. + */ + xprt_get(xprt); + args->bc_xprt->xpt_bc_xprt = xprt; + xprt->bc_xprt = args->bc_xprt; + bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); + transport->sock = bc_sock->sk_sock; + transport->inet = bc_sock->sk_sk; + /* * Since we don't want connections for the backchannel, we set * the xprt status to connected @@ -2415,6 +2428,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) if (try_module_get(THIS_MODULE)) return xprt; + xprt_put(xprt); ret = ERR_PTR(-EINVAL); out_err: xprt_free(xprt); -- cgit v1.2.3-58-ga151 From f0418aa4b1103f959d64dc18273efa04ee0140e9 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 Dec 2010 13:48:19 -0500 Subject: rpc: allow xprt_class->setup to return a preexisting xprt This allows us to reuse the xprt associated with a server connection if one has already been set up. Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 3 +++ net/sunrpc/xprtsock.c | 18 +++++++++--------- 3 files changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 89d10d279a20..bef0f535f746 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -321,6 +321,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) #define XPRT_CONNECTION_CLOSE (8) +#define XPRT_INITIALIZED (9) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 749ad15ae305..856274d7e85c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1102,6 +1102,9 @@ found: -PTR_ERR(xprt)); return xprt; } + if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) + /* ->setup returned a pre-initialized xprt: */ + return xprt; spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0ef4dd4414ec..ee091c869fcd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2359,6 +2359,15 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) struct svc_sock *bc_sock; struct rpc_xprt *ret; + if (args->bc_xprt->xpt_bc_xprt) { + /* + * This server connection already has a backchannel + * export; we can't create a new one, as we wouldn't be + * able to match replies based on xid any more. So, + * reuse the already-existing one: + */ + return args->bc_xprt->xpt_bc_xprt; + } xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; @@ -2396,15 +2405,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) xprt->address_strings[RPC_DISPLAY_PORT], xprt->address_strings[RPC_DISPLAY_PROTO]); - /* - * The backchannel uses the same socket connection as the - * forechannel - */ - if (args->bc_xprt->xpt_bc_xprt) { - /* XXX: actually, want to catch this case... */ - ret = ERR_PTR(-EINVAL); - goto out_err; - } /* * Once we've associated a backchannel xprt with a connection, * we want to keep it around as long as long as the connection -- cgit v1.2.3-58-ga151