From 1bde5ac49398a064c753bb490535cfad89e99a5f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Dec 2010 09:32:46 -0800 Subject: tcp: fix listening_get_next() Alexey Vlasov found /proc/net/tcp could sometime loop and display millions of sockets in LISTEN state. In 2.6.29, when we converted TCP hash tables to RCU, we left two sk_next() calls in listening_get_next(). We must instead use sk_nulls_next() to properly detect an end of chain. Reported-by: Alexey Vlasov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e13da6de1fc7..d978bb2f748b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2030,7 +2030,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) get_req: req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; } - sk = sk_next(st->syn_wait_sk); + sk = sk_nulls_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { @@ -2039,7 +2039,7 @@ get_req: if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - sk = sk_next(sk); + sk = sk_nulls_next(sk); } get_sk: sk_nulls_for_each_from(sk, node) { -- cgit v1.2.3-58-ga151 From e058464990c2ef1f3ecd6b83a154913c3c06f02a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 23 Dec 2010 12:03:57 -0800 Subject: Revert "ipv4: Allow configuring subnets as local addresses" This reverts commit 4465b469008bc03b98a1b8df4e9ae501b6c69d4b. Conflicts: net/ipv4/fib_frontend.c As reported by Ben Greear, this causes regressions: > Change 4465b469008bc03b98a1b8df4e9ae501b6c69d4b caused rules > to stop matching the input device properly because the > FLOWI_FLAG_MATCH_ANY_IIF is always defined in ip_dev_find(). > > This breaks rules such as: > > ip rule add pref 512 lookup local > ip rule del pref 0 lookup local > ip link set eth2 up > ip -4 addr add 172.16.0.102/24 broadcast 172.16.0.255 dev eth2 > ip rule add to 172.16.0.102 iif eth2 lookup local pref 10 > ip rule add iif eth2 lookup 10001 pref 20 > ip route add 172.16.0.0/24 dev eth2 table 10001 > ip route add unreachable 0/0 table 10001 > > If you had a second interface 'eth0' that was on a different > subnet, pinging a system on that interface would fail: > > [root@ct503-60 ~]# ping 192.168.100.1 > connect: Invalid argument Reported-by: Ben Greear Signed-off-by: David S. Miller --- include/net/flow.h | 1 - net/core/fib_rules.c | 3 +-- net/ipv4/fib_frontend.c | 10 ++++++++-- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/flow.h b/include/net/flow.h index 0ac3fb5e0973..bb08692a20b0 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -49,7 +49,6 @@ struct flowi { __u8 proto; __u8 flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_MATCH_ANY_IIF 0x02 union { struct { __be16 sport; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 82a4369ae150..a20e5d3bbfa0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,8 +181,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif) && - !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF)) + if (rule->iifindex && (rule->iifindex != fl->iif)) goto out; if (rule->oifindex && (rule->oifindex != fl->oif)) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb6f69a8f27a..c19c1f739fba 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -163,13 +163,19 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) .daddr = addr } }, - .flags = FLOWI_FLAG_MATCH_ANY_IIF }; struct fib_result res = { 0 }; struct net_device *dev = NULL; + struct fib_table *local_table; + +#ifdef CONFIG_IP_MULTIPLE_TABLES + res.r = NULL; +#endif rcu_read_lock(); - if (fib_lookup(net, &fl, &res)) { + local_table = fib_get_table(net, RT_TABLE_LOCAL); + if (!local_table || + fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { rcu_read_unlock(); return NULL; } -- cgit v1.2.3-58-ga151 From fc75fc8339e7727167443469027540b283daac71 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Dec 2010 04:39:39 +0000 Subject: ipv4: dont create routes on down devices In ip_route_output_slow(), instead of allowing a route to be created on a not UPed device, report -ENETUNREACH immediately. # ip tunnel add mode ipip remote 10.16.0.164 local 10.16.0.72 dev eth0 # (Note : tunl1 is down) # ping -I tunl1 10.1.2.3 PING 10.1.2.3 (10.1.2.3) from 192.168.18.5 tunl1: 56(84) bytes of data. (nothing) # ./a.out tunl1 # ip tunnel del tunl1 Message from syslogd@shelby at Dec 22 10:12:08 ... kernel: unregister_netdevice: waiting for tunl1 to become free. Usage count = 3 After patch: # ping -I tunl1 10.1.2.3 connect: Network is unreachable Reported-by: Nicolas Dichtel Signed-off-by: Eric Dumazet Reviewed-by: Octavian Purdila Signed-off-by: David S. Miller --- net/ipv4/route.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 987bf9adb318..df948b0f1ac9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2585,9 +2585,10 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, goto out; /* RACE: Check return value of inet_select_addr instead. */ - if (rcu_dereference(dev_out->ip_ptr) == NULL) - goto out; /* Wrong error code */ - + if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { + err = -ENETUNREACH; + goto out; + } if (ipv4_is_local_multicast(oldflp->fl4_dst) || ipv4_is_lbcast(oldflp->fl4_dst)) { if (!fl.fl4_src) -- cgit v1.2.3-58-ga151