From 427345d61297f5a501114b2c02e6af06a9768788 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 13 Feb 2017 22:26:49 +0100
Subject: netfilter: nft_ct: fix random validation errors for zone set support

Dan reports:
 net/netfilter/nft_ct.c:549 nft_ct_set_init()
 error: uninitialized symbol 'len'.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: edee4f1e924582 ("netfilter: nft_ct: add zone id set support")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_ct.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index c6b8022c0e47..bf548a7a71ec 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -528,6 +528,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
 		if (!nft_ct_tmpl_alloc_pcpu())
 			return -ENOMEM;
 		nft_ct_pcpu_template_refcnt++;
+		len = sizeof(u16);
 		break;
 #endif
 	default:
-- 
cgit v1.2.3-58-ga151


From dfcb9f4f99f1e9a49e43398a7bfbf56927544af1 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Thu, 23 Feb 2017 09:31:18 -0300
Subject: sctp: deny peeloff operation on asocs with threads sleeping on it

commit 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf")
attempted to avoid a BUG_ON call when the association being used for a
sendmsg() is blocked waiting for more sndbuf and another thread did a
peeloff operation on such asoc, moving it to another socket.

As Ben Hutchings noticed, then in such case it would return without
locking back the socket and would cause two unlocks in a row.

Further analysis also revealed that it could allow a double free if the
application managed to peeloff the asoc that is created during the
sendmsg call, because then sctp_sendmsg() would try to free the asoc
that was created only for that call.

This patch takes another approach. It will deny the peeloff operation
if there is a thread sleeping on the asoc, so this situation doesn't
exist anymore. This avoids the issues described above and also honors
the syscalls that are already being handled (it can be multiple sendmsg
calls).

Joint work with Xin Long.

Fixes: 2dcab5984841 ("sctp: avoid BUG_ON on sctp_wait_for_sndbuf")
Cc: Alexander Popov <alex.popov@linux.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b5321486fbed..465a9c8464f9 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4862,6 +4862,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
 	if (!asoc)
 		return -EINVAL;
 
+	/* If there is a thread waiting on more sndbuf space for
+	 * sending on this asoc, it cannot be peeled.
+	 */
+	if (waitqueue_active(&asoc->wait))
+		return -EBUSY;
+
 	/* An association cannot be branched off from an already peeled-off
 	 * socket, nor is this supported for tcp style sockets.
 	 */
@@ -7599,8 +7605,6 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 		 */
 		release_sock(sk);
 		current_timeo = schedule_timeout(current_timeo);
-		if (sk != asoc->base.sk)
-			goto do_error;
 		lock_sock(sk);
 
 		*timeo_p = current_timeo;
-- 
cgit v1.2.3-58-ga151


From 77cc7aee099e7a7843835995101379405564f775 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Thu, 23 Feb 2017 20:47:01 +0800
Subject: RDS: IB: fix ifnullfree.cocci warnings

net/rds/ib.c:115:2-7: WARNING: NULL check before freeing functions like kfree, debugfs_remove, debugfs_remove_recursive or usb_free_urb is not needed. Maybe consider reorganizing relevant code to avoid passing NULL values.

 NULL check before some freeing functions is not needed.

 Based on checkpatch warning
 "kfree(NULL) is safe this check is probably not required"
 and kfreeaddr.cocci by Julia Lawall.

Generated by: scripts/coccinelle/free/ifnullfree.cocci

Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib.c b/net/rds/ib.c
index 8d70884d7bb6..91fe46f1e4cc 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -111,8 +111,7 @@ static void rds_ib_dev_free(struct work_struct *work)
 		kfree(i_ipaddr);
 	}
 
-	if (rds_ibdev->vector_load)
-		kfree(rds_ibdev->vector_load);
+	kfree(rds_ibdev->vector_load);
 
 	kfree(rds_ibdev);
 }
-- 
cgit v1.2.3-58-ga151


From 681a55d71799b575f46fe94121728cf67460d1c3 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Thu, 23 Feb 2017 11:10:31 -0500
Subject: tipc: move premature initilalization of stack variables

In the function tipc_rcv() we initialize a couple of stack variables
from the message header before that same header has been validated.
In rare cases when the arriving header is non-linar, the validation
function itself may linearize the buffer by calling skb_may_pull(),
while the wrongly initialized stack fields are not updated accordingly.

We fix this in this commit.

Reported-by: Matthew Wong <mwong@sonusnet.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/node.c b/net/tipc/node.c
index e9295fa3a554..4512e83652b1 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1505,19 +1505,21 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 {
 	struct sk_buff_head xmitq;
 	struct tipc_node *n;
-	struct tipc_msg *hdr = buf_msg(skb);
-	int usr = msg_user(hdr);
+	struct tipc_msg *hdr;
 	int bearer_id = b->identity;
 	struct tipc_link_entry *le;
-	u16 bc_ack = msg_bcast_ack(hdr);
 	u32 self = tipc_own_addr(net);
-	int rc = 0;
+	int usr, rc = 0;
+	u16 bc_ack;
 
 	__skb_queue_head_init(&xmitq);
 
-	/* Ensure message is well-formed */
+	/* Ensure message is well-formed before touching the header */
 	if (unlikely(!tipc_msg_validate(skb)))
 		goto discard;
+	hdr = buf_msg(skb);
+	usr = msg_user(hdr);
+	bc_ack = msg_bcast_ack(hdr);
 
 	/* Handle arrival of discovery or broadcast packet */
 	if (unlikely(msg_non_seq(hdr))) {
-- 
cgit v1.2.3-58-ga151


From 774521f353e1df1eeb1751775a924e7807465abe Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Fri, 24 Feb 2017 12:33:09 +0000
Subject: rxrpc: Fix an assertion in rxrpc_read()

In the rxrpc_read() function, which allows a user to read the contents of a
key, we miscalculate the expected length of an encoded rxkad token by not
taking into account the key length.  However, the data is stored later
anyway with an ENCODE_DATA() call - and an assertion failure then ensues
when the lengths are checked at the end.

Fix this by including the key length in the token size estimation.

The following assertion is produced:

Assertion failed - 384(0x180) == 380(0x17c) is false
------------[ cut here ]------------
kernel BUG at ../net/rxrpc/key.c:1221!
invalid opcode: 0000 [#1] SMP
Modules linked in:
CPU: 2 PID: 2957 Comm: keyctl Not tainted 4.10.0-fscache+ #483
Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014
task: ffff8804013a8500 task.stack: ffff8804013ac000
RIP: 0010:rxrpc_read+0x10de/0x11b6
RSP: 0018:ffff8804013afe48 EFLAGS: 00010296
RAX: 000000000000003b RBX: 0000000000000003 RCX: 0000000000000000
RDX: 0000000000040001 RSI: 00000000000000f6 RDI: 0000000000000300
RBP: ffff8804013afed8 R08: 0000000000000001 R09: 0000000000000001
R10: ffff8804013afd90 R11: 0000000000000002 R12: 00005575f7c911b4
R13: 00005575f7c911b3 R14: 0000000000000157 R15: ffff880408a5d640
FS:  00007f8dfbc73700(0000) GS:ffff88041fb00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005575f7c91008 CR3: 000000040120a000 CR4: 00000000001406e0
Call Trace:
 keyctl_read_key+0xb6/0xd7
 SyS_keyctl+0x83/0xe7
 do_syscall_64+0x80/0x191
 entry_SYSCALL64_slow_path+0x25/0x25

Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 18c737a61d80..0a4e28477ad9 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -1065,7 +1065,7 @@ static long rxrpc_read(const struct key *key,
 
 		switch (token->security_index) {
 		case RXRPC_SECURITY_RXKAD:
-			toksize += 8 * 4;	/* viceid, kvno, key*2, begin,
+			toksize += 9 * 4;	/* viceid, kvno, key*2 + len, begin,
 						 * end, primary, tktlen */
 			toksize += RND(token->kad->ticket_len);
 			break;
-- 
cgit v1.2.3-58-ga151


From 7dcdf941cdc96692ab99fd790c8cc68945514851 Mon Sep 17 00:00:00 2001
From: David Forster <dforster@brocade.com>
Date: Fri, 24 Feb 2017 14:20:32 +0000
Subject: vti6: return GRE_KEY for vti6

Align vti6 with vti by returning GRE_KEY flag. This enables iproute2
to display tunnel keys on "ip -6 tunnel show"

Signed-off-by: David Forster <dforster@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_vti.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index c795fee372c4..644ba59fbd9d 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -693,6 +693,10 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
 	u->link = p->link;
 	u->i_key = p->i_key;
 	u->o_key = p->o_key;
+	if (u->i_key)
+		u->i_flags |= GRE_KEY;
+	if (u->o_key)
+		u->o_flags |= GRE_KEY;
 	u->proto = p->proto;
 
 	memcpy(u->name, p->name, sizeof(u->name));
-- 
cgit v1.2.3-58-ga151


From 3b5923f0796b13e731802d40faf62d2c59d98e48 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Fri, 24 Feb 2017 04:28:01 -0500
Subject: rds: fix memory leak error

When the function register_netdevice_notifier fails, the memory
allocated by kmem_cache_create should be freed by the function
kmem_cache_destroy.

Cc: Joe Jin <joe.jin@oracle.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 57bb52361e0f..5438f6725092 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -641,12 +641,12 @@ static int rds_tcp_init(void)
 	ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
 	if (ret) {
 		pr_warn("could not register rds_tcp_dev_notifier\n");
-		goto out;
+		goto out_slab;
 	}
 
 	ret = register_pernet_subsys(&rds_tcp_net_ops);
 	if (ret)
-		goto out_slab;
+		goto out_notifier;
 
 	ret = rds_tcp_recv_init();
 	if (ret)
@@ -664,9 +664,10 @@ out_recv:
 	rds_tcp_recv_exit();
 out_pernet:
 	unregister_pernet_subsys(&rds_tcp_net_ops);
-out_slab:
+out_notifier:
 	if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
 		pr_warn("could not unregister rds_tcp_dev_notifier\n");
+out_slab:
 	kmem_cache_destroy(rds_tcp_conn_slab);
 out:
 	return ret;
-- 
cgit v1.2.3-58-ga151


From 7fb668ac7b498f7e8ba2de373fe8a8648bd39a4c Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jarno@ovn.org>
Date: Thu, 23 Feb 2017 17:08:53 -0800
Subject: netfilter: nf_ct_expect: nf_ct_expect_related_report(): Return zero
 on success.

Commit 4dee62b1b9b4 ("netfilter: nf_ct_expect: nf_ct_expect_insert()
returns void") inadvertently changed the successful return value of
nf_ct_expect_related_report() from 0 to 1, which caused openvswitch
conntrack integration fail in FTP test cases.

Fix this by always returning zero on the success code path.

Fixes: 4dee62b1b9b4 ("netfilter: nf_ct_expect: nf_ct_expect_insert() returns void")
Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_expect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index e19a69787d99..d6ace69d57dc 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -467,7 +467,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
 
 	spin_unlock_bh(&nf_conntrack_expect_lock);
 	nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);
-	return ret;
+	return 0;
 out:
 	spin_unlock_bh(&nf_conntrack_expect_lock);
 	return ret;
-- 
cgit v1.2.3-58-ga151


From 8bcfd0925ef15f072ba1e7bee2c25e9e1b5fd6ca Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 26 Feb 2017 15:50:52 +0200
Subject: ipv4: add missing initialization for flowi4_uid

Avoid matching of random stack value for uid when rules
are looked up on input route or when RP filter is used.
Problem should affect only setups that use ip rules with
uid range.

Fixes: 622ec2c9d524 ("net: core: add UID to flows, rules, and routes")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/fib_frontend.c | 6 +++---
 net/ipv4/route.c        | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7db2ad2e82d3..b39a791f6756 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	int ret, no_addr;
 	struct fib_result res;
 	struct flowi4 fl4;
-	struct net *net;
+	struct net *net = dev_net(dev);
 	bool dev_match;
 
 	fl4.flowi4_oif = 0;
@@ -332,6 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
 	fl4.flowi4_tun_key.tun_id = 0;
 	fl4.flowi4_flags = 0;
+	fl4.flowi4_uid = sock_net_uid(net, NULL);
 
 	no_addr = idev->ifa_list == NULL;
 
@@ -339,13 +340,12 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 
 	trace_fib_validate_source(dev, &fl4);
 
-	net = dev_net(dev);
 	if (fib_lookup(net, &fl4, &res, 0))
 		goto last_resort;
 	if (res.type != RTN_UNICAST &&
 	    (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
 		goto e_inval;
-	if (!rpf && !fib_num_tclassid_users(dev_net(dev)) &&
+	if (!rpf && !fib_num_tclassid_users(net) &&
 	    (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
 		goto last_resort;
 	fib_combine_itag(itag, &res);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cb494a5050f7..584ed667bfa4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1876,6 +1876,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.flowi4_flags = 0;
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
+	fl4.flowi4_uid = sock_net_uid(net, NULL);
 	err = fib_lookup(net, &fl4, &res, 0);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
-- 
cgit v1.2.3-58-ga151


From 6e28099d38c0e50d62c1afc054e37e573adf3d21 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sun, 26 Feb 2017 17:14:35 +0200
Subject: ipv4: mask tos for input route

Restore the lost masking of TOS in input route code to
allow ip rules to match it properly.

Problem [1] noticed by Shmulik Ladkani <shmulik.ladkani@gmail.com>

[1] http://marc.info/?t=137331755300040&r=1&w=2

Fixes: 89aef8921bfb ("ipv4: Delete routing cache.")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 584ed667bfa4..8471dd116771 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2009,6 +2009,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
 	int res;
 
+	tos &= IPTOS_RT_MASK;
 	rcu_read_lock();
 
 	/* Multicast recognition logic is moved from route cache to here.
-- 
cgit v1.2.3-58-ga151


From 4b86c459c7bee3acaf92f0e2b4c6ac803eaa1a58 Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jarno@ovn.org>
Date: Thu, 23 Feb 2017 17:08:54 -0800
Subject: netfilter: nf_ct_expect: Change __nf_ct_expect_check() return value.

Commit 4dee62b1b9b4 ("netfilter: nf_ct_expect: nf_ct_expect_insert()
returns void") inadvertently changed the successful return value of
nf_ct_expect_related_report() from 0 to 1 due to
__nf_ct_expect_check() returning 1 on success.  Prevent this
regression in the future by changing the return value of
__nf_ct_expect_check() to 0 on success.

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
Acked-by: Joe Stringer <joe@ovn.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_conntrack_expect.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index d6ace69d57dc..4b2e1fb28bb4 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -410,7 +410,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	struct net *net = nf_ct_exp_net(expect);
 	struct hlist_node *next;
 	unsigned int h;
-	int ret = 1;
+	int ret = 0;
 
 	if (!master_help) {
 		ret = -ESHUTDOWN;
@@ -460,7 +460,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
 
 	spin_lock_bh(&nf_conntrack_expect_lock);
 	ret = __nf_ct_expect_check(expect);
-	if (ret <= 0)
+	if (ret < 0)
 		goto out;
 
 	nf_ct_expect_insert(expect);
-- 
cgit v1.2.3-58-ga151


From 13aa5a8f498dacd5f1a8e35be72af47e630fb8c6 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 26 Feb 2017 17:12:11 +0100
Subject: netfilter: nft_set_bitmap: incorrect bitmap size

priv->bitmap_size stores the real bitmap size, instead of the full
struct nft_bitmap object.

Fixes: 665153ff5752 ("netfilter: nf_tables: add bitmap set type")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_set_bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 97f9649bcc7e..152d226552c1 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -258,7 +258,7 @@ static int nft_bitmap_init(const struct nft_set *set,
 {
 	struct nft_bitmap *priv = nft_set_priv(set);
 
-	priv->bitmap_size = nft_bitmap_total_size(set->klen);
+	priv->bitmap_size = nft_bitmap_size(set->klen);
 
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From 2e3ce5bc2aa938653c3866aa7f4901a1f199b1c8 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 24 Feb 2017 15:18:46 +0800
Subject: sctp: set sin_port for addr param when checking duplicate address

Commit b8607805dd15 ("sctp: not copying duplicate addrs to the assoc's
bind address list") tried to check for duplicate address before copying
to asoc's bind_addr list from global addr list.

But all the addrs' sin_ports in global addr list are 0 while the addrs'
sin_ports are bp->port in asoc's bind_addr list. It means even if it's
a duplicate address, af->cmp_addr will still return 0 as the their
sin_ports are different.

This patch is to fix it by setting the sin_port for addr param with
bp->port before comparing the addrs.

Fixes: b8607805dd15 ("sctp: not copying duplicate addrs to the assoc's bind address list")
Reported-by: Wei Chen <weichen@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/protocol.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 8227bbbd077a..1b6d4574d2b0 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -199,6 +199,7 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
 			      sctp_scope_t scope, gfp_t gfp, int copy_flags)
 {
 	struct sctp_sockaddr_entry *addr;
+	union sctp_addr laddr;
 	int error = 0;
 
 	rcu_read_lock();
@@ -220,7 +221,10 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
 		     !(copy_flags & SCTP_ADDR6_PEERSUPP)))
 			continue;
 
-		if (sctp_bind_addr_state(bp, &addr->a) != -1)
+		laddr = addr->a;
+		/* also works for setting ipv6 address port */
+		laddr.v4.sin_port = htons(bp->port);
+		if (sctp_bind_addr_state(bp, &laddr) != -1)
 			continue;
 
 		error = sctp_add_bind_addr(bp, &addr->a, sizeof(addr->a),
-- 
cgit v1.2.3-58-ga151


From 99253eb750fda6a644d5188fb26c43bad8d5a745 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Fri, 24 Feb 2017 16:29:06 +0800
Subject: ipv6: check sk sk_type and protocol early in ip_mroute_set/getsockopt

Commit 5e1859fbcc3c ("ipv4: ipmr: various fixes and cleanups") fixed
the issue for ipv4 ipmr:

  ip_mroute_setsockopt() & ip_mroute_getsockopt() should not
  access/set raw_sk(sk)->ipmr_table before making sure the socket
  is a raw socket, and protocol is IGMP

The same fix should be done for ipv6 ipmr as well.

This patch can fix the panic caused by overwriting the same offset
as ipmr_table as in raw_sk(sk) when accessing other type's socket
by ip_mroute_setsockopt().

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index babaf3ec2742..6ba6c900ebcf 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1666,6 +1666,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct net *net = sock_net(sk);
 	struct mr6_table *mrt;
 
+	if (sk->sk_type != SOCK_RAW ||
+	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+		return -EOPNOTSUPP;
+
 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
 	if (!mrt)
 		return -ENOENT;
@@ -1677,9 +1681,6 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 
 	switch (optname) {
 	case MRT6_INIT:
-		if (sk->sk_type != SOCK_RAW ||
-		    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
-			return -EOPNOTSUPP;
 		if (optlen < sizeof(int))
 			return -EINVAL;
 
@@ -1815,6 +1816,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	struct net *net = sock_net(sk);
 	struct mr6_table *mrt;
 
+	if (sk->sk_type != SOCK_RAW ||
+	    inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
+		return -EOPNOTSUPP;
+
 	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
 	if (!mrt)
 		return -ENOENT;
-- 
cgit v1.2.3-58-ga151


From edb9d1bff4bbe19b8ae0e71b1f38732591a9eeb2 Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Fri, 24 Feb 2017 11:00:32 -0500
Subject: net sched actions: decrement module reference count after table
 flush.

When tc actions are loaded as a module and no actions have been installed,
flushing them would result in actions removed from the memory, but modules
reference count not being decremented, so that the modules would not be
unloaded.

Following is example with GACT action:

% sudo modprobe act_gact
% lsmod
Module                  Size  Used by
act_gact               16384  0
%
% sudo tc actions ls action gact
%
% sudo tc actions flush action gact
% lsmod
Module                  Size  Used by
act_gact               16384  1
% sudo tc actions flush action gact
% lsmod
Module                  Size  Used by
act_gact               16384  2
% sudo rmmod act_gact
rmmod: ERROR: Module act_gact is in use
....

After the fix:
% lsmod
Module                  Size  Used by
act_gact               16384  0
%
% sudo tc actions add action pass index 1
% sudo tc actions add action pass index 2
% sudo tc actions add action pass index 3
% lsmod
Module                  Size  Used by
act_gact               16384  3
%
% sudo tc actions flush action gact
% lsmod
Module                  Size  Used by
act_gact               16384  0
%
% sudo tc actions flush action gact
% lsmod
Module                  Size  Used by
act_gact               16384  0
% sudo rmmod act_gact
% lsmod
Module                  Size  Used by
%

Fixes: f97017cdefef ("net-sched: Fix actions flushing")
Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f219ff325ed4..dfe64f81cb16 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -859,10 +859,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 		goto out_module_put;
 
 	err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
-	if (err < 0)
+	if (err <= 0)
 		goto out_module_put;
-	if (err == 0)
-		goto noflush_out;
 
 	nla_nest_end(skb, nest);
 
@@ -879,7 +877,6 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 out_module_put:
 	module_put(ops->owner);
 err_out:
-noflush_out:
 	kfree_skb(skb);
 	return err;
 }
-- 
cgit v1.2.3-58-ga151


From d7e15835ab278443da5c0cfe6573f8f793ed40a3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 24 Feb 2017 21:57:13 +0000
Subject: rxrpc: Kernel calls get stuck in recvmsg

Calls made through the in-kernel interface can end up getting stuck because
of a missed variable update in a loop in rxrpc_recvmsg_data().  The problem
is like this:

 (1) A new packet comes in and doesn't cause a notification to be given to
     the client as there's still another packet in the ring - the
     assumption being that if the client will keep drawing off data until
     the ring is empty.

 (2) The client is in rxrpc_recvmsg_data(), inside the big while loop that
     iterates through the packets.  This copies the window pointers into
     variables rather than using the information in the call struct
     because:

     (a) MSG_PEEK might be in effect;

     (b) we need a barrier after reading call->rx_top to pair with the
     	 barrier in the softirq routine that loads the buffer.

 (3) The reading of call->rx_top is done outside of the loop, and top is
     never updated whilst we're in the loop.  This means that even through
     there's a new packet available, we don't see it and may return -EFAULT
     to the caller - who will happily return to the scheduler and await the
     next notification.

 (4) No further notifications are forthcoming until there's an abort as the
     ring isn't empty.

The fix is to move the read of call->rx_top inside the loop - but it needs
to be done before the condition is checked.

Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/recvmsg.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index c29362d50a92..f3a688e10843 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -320,8 +320,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
 
 	/* Barriers against rxrpc_input_data(). */
 	hard_ack = call->rx_hard_ack;
-	top = smp_load_acquire(&call->rx_top);
-	for (seq = hard_ack + 1; before_eq(seq, top); seq++) {
+	seq = hard_ack + 1;
+	while (top = smp_load_acquire(&call->rx_top),
+	       before_eq(seq, top)
+	       ) {
 		ix = seq & RXRPC_RXTX_BUFF_MASK;
 		skb = call->rxtx_buffer[ix];
 		if (!skb) {
@@ -394,6 +396,8 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
 			ret = 1;
 			goto out;
 		}
+
+		seq++;
 	}
 
 out:
-- 
cgit v1.2.3-58-ga151


From 37f1c63e3e29181ad412485eec2200cffc1dc70f Mon Sep 17 00:00:00 2001
From: Roman Mashak <mrv@mojatatu.com>
Date: Fri, 24 Feb 2017 17:36:58 -0500
Subject: net sched actions: do not overwrite status of action creation.

nla_memdup_cookie was overwriting err value, declared at function
scope and earlier initialized with result of ->init(). At success
nla_memdup_cookie() returns 0, and thus module refcnt decremented,
although the action was installed.

$ sudo tc actions add action pass index 1 cookie 1234
$ sudo tc actions ls action gact

        action order 0: gact action pass
         random type none pass val 0
         index 1 ref 1 bind 0
$
$ lsmod
Module                  Size  Used by
act_gact               16384  0
...
$
$ sudo rmmod act_gact
[   52.310283] ------------[ cut here ]------------
[   52.312551] WARNING: CPU: 1 PID: 455 at kernel/module.c:1113
module_put+0x99/0xa0
[   52.316278] Modules linked in: act_gact(-) crct10dif_pclmul crc32_pclmul
ghash_clmulni_intel psmouse pcbc evbug aesni_intel aes_x86_64 crypto_simd
serio_raw glue_helper pcspkr cryptd
[   52.322285] CPU: 1 PID: 455 Comm: rmmod Not tainted 4.10.0+ #11
[   52.324261] Call Trace:
[   52.325132]  dump_stack+0x63/0x87
[   52.326236]  __warn+0xd1/0xf0
[   52.326260]  warn_slowpath_null+0x1d/0x20
[   52.326260]  module_put+0x99/0xa0
[   52.326260]  tcf_hashinfo_destroy+0x7f/0x90
[   52.326260]  gact_exit_net+0x27/0x40 [act_gact]
[   52.326260]  ops_exit_list.isra.6+0x38/0x60
[   52.326260]  unregister_pernet_operations+0x90/0xe0
[   52.326260]  unregister_pernet_subsys+0x21/0x30
[   52.326260]  tcf_unregister_action+0x68/0xa0
[   52.326260]  gact_cleanup_module+0x17/0xa0f [act_gact]
[   52.326260]  SyS_delete_module+0x1ba/0x220
[   52.326260]  entry_SYSCALL_64_fastpath+0x1e/0xad
[   52.326260] RIP: 0033:0x7f527ffae367
[   52.326260] RSP: 002b:00007ffeb402a598 EFLAGS: 00000202 ORIG_RAX:
00000000000000b0
[   52.326260] RAX: ffffffffffffffda RBX: 0000559b069912a0 RCX: 00007f527ffae367
[   52.326260] RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559b06991308
[   52.326260] RBP: 0000000000000003 R08: 00007f5280264420 R09: 00007ffeb4029511
[   52.326260] R10: 000000000000087b R11: 0000000000000202 R12: 00007ffeb4029580
[   52.326260] R13: 0000000000000000 R14: 0000000000000000 R15: 0000559b069912a0
[   52.354856] ---[ end trace 90d89401542b0db6 ]---
$

With the fix:

$ sudo modprobe act_gact
$ lsmod
Module                  Size  Used by
act_gact               16384  0
...
$ sudo tc actions add action pass index 1 cookie 1234
$ sudo tc actions ls action gact

        action order 0: gact action pass
         random type none pass val 0
         index 1 ref 1 bind 0
$
$ lsmod
Module                  Size  Used by
act_gact               16384  1
...
$ sudo rmmod act_gact
rmmod: ERROR: Module act_gact is in use
$
$ sudo /home/mrv/bin/tc actions del action gact index 1
$ sudo rmmod act_gact
$ lsmod
Module                  Size  Used by
$

Fixes: 1045ba77a ("net sched actions: Add support for user cookies")
Signed-off-by: Roman Mashak <mrv@mojatatu.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index dfe64f81cb16..b70aa57319ea 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -613,8 +613,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
 			goto err_mod;
 		}
 
-		err = nla_memdup_cookie(a, tb);
-		if (err < 0) {
+		if (nla_memdup_cookie(a, tb) < 0) {
+			err = -ENOMEM;
 			tcf_hash_release(a, bind);
 			goto err_mod;
 		}
-- 
cgit v1.2.3-58-ga151


From 1ecc9ad02c3d4cf44bc94bffcb3b12e7861b00a7 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 25 Feb 2017 17:57:43 +0200
Subject: xfrm: provide correct dst in xfrm_neigh_lookup

Fix xfrm_neigh_lookup to provide dst->path to the
neigh_lookup dst_ops method.

When skb is provided, the IP address in packet should already
match the dst->path address family. But for the non-skb case,
we should consider the last tunnel address as nexthop address.

Fixes: f894cbf847c9 ("net: Add optional SKB arg to dst_ops->neigh_lookup().")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5f3e87866438..0806dccdf507 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2836,14 +2836,8 @@ static unsigned int xfrm_mtu(const struct dst_entry *dst)
 	return mtu ? : dst_mtu(dst->path);
 }
 
-static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
-					   struct sk_buff *skb,
-					   const void *daddr)
-{
-	return dst->path->ops->neigh_lookup(dst, skb, daddr);
-}
-
-static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
+static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
+					const void *daddr)
 {
 	const struct dst_entry *path = dst->path;
 
@@ -2857,6 +2851,25 @@ static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
 		else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
 			daddr = &xfrm->id.daddr;
 	}
+	return daddr;
+}
+
+static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
+					   struct sk_buff *skb,
+					   const void *daddr)
+{
+	const struct dst_entry *path = dst->path;
+
+	if (!skb)
+		daddr = xfrm_get_dst_nexthop(dst, daddr);
+	return path->ops->neigh_lookup(path, skb, daddr);
+}
+
+static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
+{
+	const struct dst_entry *path = dst->path;
+
+	daddr = xfrm_get_dst_nexthop(dst, daddr);
 	path->ops->confirm_neigh(path, daddr);
 }
 
-- 
cgit v1.2.3-58-ga151


From 51fb60eb162ab84c5edf2ae9c63cf0b878e5547e Mon Sep 17 00:00:00 2001
From: Paul Hüber <phueber@kernsp.in>
Date: Sun, 26 Feb 2017 17:58:19 +0100
Subject: l2tp: avoid use-after-free caused by l2tp_ip_backlog_recv
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2tp_ip_backlog_recv may not return -1 if the packet gets dropped.
The return value is passed up to ip_local_deliver_finish, which treats
negative values as an IP protocol number for resubmission.

Signed-off-by: Paul Hüber <phueber@kernsp.in>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index c59712057dc8..d25038cfd64e 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -388,7 +388,7 @@ static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
 drop:
 	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_INDISCARDS);
 	kfree_skb(skb);
-	return -1;
+	return 0;
 }
 
 /* Userspace will call sendmsg() on the tunnel socket to send L2TP
-- 
cgit v1.2.3-58-ga151