465 files changed, 15767 insertions, 10064 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index ae1896fa45e2..83b19e072224 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -17,6 +17,11 @@
 
 void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype)
 {
+	dev->addr_len = EUI64_ADDR_LEN;
+	dev->type = ARPHRD_6LOWPAN;
+	dev->mtu = IPV6_MIN_MTU;
+	dev->priv_flags |= IFF_NO_QUEUE;
+
 	lowpan_priv(dev)->lltype = lltype;
 }
 EXPORT_SYMBOL(lowpan_netdev_setup);
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 1e0071fdcf72..346b5c1a9185 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -49,36 +49,178 @@
 #include <linux/bitops.h>
 #include <linux/if_arp.h>
 #include <linux/netdevice.h>
+
 #include <net/6lowpan.h>
 #include <net/ipv6.h>
-#include <net/af_ieee802154.h>
+
+/* special link-layer handling */
+#include <net/mac802154.h>
 
 #include "nhc.h"
 
+/* Values of fields within the IPHC encoding first byte */
+#define LOWPAN_IPHC_TF_MASK	0x18
+#define LOWPAN_IPHC_TF_00	0x00
+#define LOWPAN_IPHC_TF_01	0x08
+#define LOWPAN_IPHC_TF_10	0x10
+#define LOWPAN_IPHC_TF_11	0x18
+
+#define LOWPAN_IPHC_NH		0x04
+
+#define LOWPAN_IPHC_HLIM_MASK	0x03
+#define LOWPAN_IPHC_HLIM_00	0x00
+#define LOWPAN_IPHC_HLIM_01	0x01
+#define LOWPAN_IPHC_HLIM_10	0x02
+#define LOWPAN_IPHC_HLIM_11	0x03
+
+/* Values of fields within the IPHC encoding second byte */
+#define LOWPAN_IPHC_CID		0x80
+
+#define LOWPAN_IPHC_SAC		0x40
+
+#define LOWPAN_IPHC_SAM_MASK	0x30
+#define LOWPAN_IPHC_SAM_00	0x00
+#define LOWPAN_IPHC_SAM_01	0x10
+#define LOWPAN_IPHC_SAM_10	0x20
+#define LOWPAN_IPHC_SAM_11	0x30
+
+#define LOWPAN_IPHC_M		0x08
+
+#define LOWPAN_IPHC_DAC		0x04
+
+#define LOWPAN_IPHC_DAM_MASK	0x03
+#define LOWPAN_IPHC_DAM_00	0x00
+#define LOWPAN_IPHC_DAM_01	0x01
+#define LOWPAN_IPHC_DAM_10	0x02
+#define LOWPAN_IPHC_DAM_11	0x03
+
+/* ipv6 address based on mac
+ * second bit-flip (Universe/Local) is done according RFC2464
+ */
+#define is_addr_mac_addr_based(a, m) \
+	((((a)->s6_addr[8])  == (((m)[0]) ^ 0x02)) &&	\
+	 (((a)->s6_addr[9])  == (m)[1]) &&		\
+	 (((a)->s6_addr[10]) == (m)[2]) &&		\
+	 (((a)->s6_addr[11]) == (m)[3]) &&		\
+	 (((a)->s6_addr[12]) == (m)[4]) &&		\
+	 (((a)->s6_addr[13]) == (m)[5]) &&		\
+	 (((a)->s6_addr[14]) == (m)[6]) &&		\
+	 (((a)->s6_addr[15]) == (m)[7]))
+
+/* check whether we can compress the IID to 16 bits,
+ * it's possible for unicast addresses with first 49 bits are zero only.
+ */
+#define lowpan_is_iid_16_bit_compressable(a)	\
+	((((a)->s6_addr16[4]) == 0) &&		\
+	 (((a)->s6_addr[10]) == 0) &&		\
+	 (((a)->s6_addr[11]) == 0xff) &&	\
+	 (((a)->s6_addr[12]) == 0xfe) &&	\
+	 (((a)->s6_addr[13]) == 0))
+
+/* check whether the 112-bit gid of the multicast address is mappable to: */
+
+/* 48 bits, FFXX::00XX:XXXX:XXXX */
+#define lowpan_is_mcast_addr_compressable48(a)	\
+	((((a)->s6_addr16[1]) == 0) &&		\
+	 (((a)->s6_addr16[2]) == 0) &&		\
+	 (((a)->s6_addr16[3]) == 0) &&		\
+	 (((a)->s6_addr16[4]) == 0) &&		\
+	 (((a)->s6_addr[10]) == 0))
+
+/* 32 bits, FFXX::00XX:XXXX */
+#define lowpan_is_mcast_addr_compressable32(a)	\
+	((((a)->s6_addr16[1]) == 0) &&		\
+	 (((a)->s6_addr16[2]) == 0) &&		\
+	 (((a)->s6_addr16[3]) == 0) &&		\
+	 (((a)->s6_addr16[4]) == 0) &&		\
+	 (((a)->s6_addr16[5]) == 0) &&		\
+	 (((a)->s6_addr[12]) == 0))
+
+/* 8 bits, FF02::00XX */
+#define lowpan_is_mcast_addr_compressable8(a)	\
+	((((a)->s6_addr[1])  == 2) &&		\
+	 (((a)->s6_addr16[1]) == 0) &&		\
+	 (((a)->s6_addr16[2]) == 0) &&		\
+	 (((a)->s6_addr16[3]) == 0) &&		\
+	 (((a)->s6_addr16[4]) == 0) &&		\
+	 (((a)->s6_addr16[5]) == 0) &&		\
+	 (((a)->s6_addr16[6]) == 0) &&		\
+	 (((a)->s6_addr[14]) == 0))
+
+static inline void iphc_uncompress_eui64_lladdr(struct in6_addr *ipaddr,
+						const void *lladdr)
+{
+	/* fe:80::XXXX:XXXX:XXXX:XXXX
+	 *        \_________________/
+	 *              hwaddr
+	 */
+	ipaddr->s6_addr[0] = 0xFE;
+	ipaddr->s6_addr[1] = 0x80;
+	memcpy(&ipaddr->s6_addr[8], lladdr, EUI64_ADDR_LEN);
+	/* second bit-flip (Universe/Local)
+	 * is done according RFC2464
+	 */
+	ipaddr->s6_addr[8] ^= 0x02;
+}
+
+static inline void iphc_uncompress_802154_lladdr(struct in6_addr *ipaddr,
+						 const void *lladdr)
+{
+	const struct ieee802154_addr *addr = lladdr;
+	u8 eui64[EUI64_ADDR_LEN] = { };
+
+	switch (addr->mode) {
+	case IEEE802154_ADDR_LONG:
+		ieee802154_le64_to_be64(eui64, &addr->extended_addr);
+		iphc_uncompress_eui64_lladdr(ipaddr, eui64);
+		break;
+	case IEEE802154_ADDR_SHORT:
+		/* fe:80::ff:fe00:XXXX
+		 *                \__/
+		 *             short_addr
+		 *
+		 * Universe/Local bit is zero.
+		 */
+		ipaddr->s6_addr[0] = 0xFE;
+		ipaddr->s6_addr[1] = 0x80;
+		ipaddr->s6_addr[11] = 0xFF;
+		ipaddr->s6_addr[12] = 0xFE;
+		ieee802154_le16_to_be16(&ipaddr->s6_addr16[7],
+					&addr->short_addr);
+		break;
+	default:
+		/* should never handled and filtered by 802154 6lowpan */
+		WARN_ON_ONCE(1);
+		break;
+	}
+}
+
 /* Uncompress address function for source and
  * destination address(non-multicast).
  *
- * address_mode is sam value or dam value.
+ * address_mode is the masked value for sam or dam value
  */
-static int uncompress_addr(struct sk_buff *skb,
-			   struct in6_addr *ipaddr, const u8 address_mode,
-			   const u8 *lladdr, const u8 addr_type,
-			   const u8 addr_len)
+static int uncompress_addr(struct sk_buff *skb, const struct net_device *dev,
+			   struct in6_addr *ipaddr, u8 address_mode,
+			   const void *lladdr)
 {
 	bool fail;
 
 	switch (address_mode) {
-	case LOWPAN_IPHC_ADDR_00:
+	/* SAM and DAM are the same here */
+	case LOWPAN_IPHC_DAM_00:
 		/* for global link addresses */
 		fail = lowpan_fetch_skb(skb, ipaddr->s6_addr, 16);
 		break;
-	case LOWPAN_IPHC_ADDR_01:
+	case LOWPAN_IPHC_SAM_01:
+	case LOWPAN_IPHC_DAM_01:
 		/* fe:80::XXXX:XXXX:XXXX:XXXX */
 		ipaddr->s6_addr[0] = 0xFE;
 		ipaddr->s6_addr[1] = 0x80;
 		fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[8], 8);
 		break;
-	case LOWPAN_IPHC_ADDR_02:
+	case LOWPAN_IPHC_SAM_10:
+	case LOWPAN_IPHC_DAM_10:
 		/* fe:80::ff:fe00:XXXX */
 		ipaddr->s6_addr[0] = 0xFE;
 		ipaddr->s6_addr[1] = 0x80;
@@ -86,38 +228,16 @@ static int uncompress_addr(struct sk_buff *skb,
 		ipaddr->s6_addr[12] = 0xFE;
 		fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[14], 2);
 		break;
-	case LOWPAN_IPHC_ADDR_03:
+	case LOWPAN_IPHC_SAM_11:
+	case LOWPAN_IPHC_DAM_11:
 		fail = false;
-		switch (addr_type) {
-		case IEEE802154_ADDR_LONG:
-			/* fe:80::XXXX:XXXX:XXXX:XXXX
-			 *        \_________________/
-			 *              hwaddr
-			 */
-			ipaddr->s6_addr[0] = 0xFE;
-			ipaddr->s6_addr[1] = 0x80;
-			memcpy(&ipaddr->s6_addr[8], lladdr, addr_len);
-			/* second bit-flip (Universe/Local)
-			 * is done according RFC2464
-			 */
-			ipaddr->s6_addr[8] ^= 0x02;
-			break;
-		case IEEE802154_ADDR_SHORT:
-			/* fe:80::ff:fe00:XXXX
-			 *		  \__/
-			 *	       short_addr
-			 *
-			 * Universe/Local bit is zero.
-			 */
-			ipaddr->s6_addr[0] = 0xFE;
-			ipaddr->s6_addr[1] = 0x80;
-			ipaddr->s6_addr[11] = 0xFF;
-			ipaddr->s6_addr[12] = 0xFE;
-			ipaddr->s6_addr16[7] = htons(*((u16 *)lladdr));
+		switch (lowpan_priv(dev)->lltype) {
+		case LOWPAN_LLTYPE_IEEE802154:
+			iphc_uncompress_802154_lladdr(ipaddr, lladdr);
 			break;
 		default:
-			pr_debug("Invalid addr_type set\n");
-			return -EINVAL;
+			iphc_uncompress_eui64_lladdr(ipaddr, lladdr);
+			break;
 		}
 		break;
 	default:
@@ -141,24 +261,25 @@ static int uncompress_addr(struct sk_buff *skb,
  */
 static int uncompress_context_based_src_addr(struct sk_buff *skb,
 					     struct in6_addr *ipaddr,
-					     const u8 sam)
+					     u8 address_mode)
 {
-	switch (sam) {
-	case LOWPAN_IPHC_ADDR_00:
+	switch (address_mode) {
+	case LOWPAN_IPHC_SAM_00:
 		/* unspec address ::
 		 * Do nothing, address is already ::
 		 */
 		break;
-	case LOWPAN_IPHC_ADDR_01:
+	case LOWPAN_IPHC_SAM_01:
 		/* TODO */
-	case LOWPAN_IPHC_ADDR_02:
+	case LOWPAN_IPHC_SAM_10:
 		/* TODO */
-	case LOWPAN_IPHC_ADDR_03:
+	case LOWPAN_IPHC_SAM_11:
 		/* TODO */
-		netdev_warn(skb->dev, "SAM value 0x%x not supported\n", sam);
+		netdev_warn(skb->dev, "SAM value 0x%x not supported\n",
+			    address_mode);
 		return -EINVAL;
 	default:
-		pr_debug("Invalid sam value: 0x%x\n", sam);
+		pr_debug("Invalid sam value: 0x%x\n", address_mode);
 		return -EINVAL;
 	}
 
@@ -174,11 +295,11 @@ static int uncompress_context_based_src_addr(struct sk_buff *skb,
  */
 static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
 					     struct in6_addr *ipaddr,
-					     const u8 dam)
+					     u8 address_mode)
 {
 	bool fail;
 
-	switch (dam) {
+	switch (address_mode) {
 	case LOWPAN_IPHC_DAM_00:
 		/* 00:  128 bits.  The full address
 		 * is carried in-line.
@@ -210,7 +331,7 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
 		fail = lowpan_fetch_skb(skb, &ipaddr->s6_addr[15], 1);
 		break;
 	default:
-		pr_debug("DAM value has a wrong value: 0x%x\n", dam);
+		pr_debug("DAM value has a wrong value: 0x%x\n", address_mode);
 		return -EINVAL;
 	}
 
@@ -225,77 +346,142 @@ static int lowpan_uncompress_multicast_daddr(struct sk_buff *skb,
 	return 0;
 }
 
-/* TTL uncompression values */
-static const u8 lowpan_ttl_values[] = { 0, 1, 64, 255 };
-
-int
-lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
-			 const u8 *saddr, const u8 saddr_type,
-			 const u8 saddr_len, const u8 *daddr,
-			 const u8 daddr_type, const u8 daddr_len,
-			 u8 iphc0, u8 iphc1)
+/* get the ecn values from iphc tf format and set it to ipv6hdr */
+static inline void lowpan_iphc_tf_set_ecn(struct ipv6hdr *hdr, const u8 *tf)
 {
-	struct ipv6hdr hdr = {};
-	u8 tmp, num_context = 0;
-	int err;
+	/* get the two higher bits which is ecn */
+	u8 ecn = tf[0] & 0xc0;
 
-	raw_dump_table(__func__, "raw skb data dump uncompressed",
-		       skb->data, skb->len);
+	/* ECN takes 0x30 in hdr->flow_lbl[0] */
+	hdr->flow_lbl[0] |= (ecn >> 2);
+}
 
-	/* another if the CID flag is set */
-	if (iphc1 & LOWPAN_IPHC_CID) {
-		pr_debug("CID flag is set, increase header with one\n");
-		if (lowpan_fetch_skb(skb, &num_context, sizeof(num_context)))
-			return -EINVAL;
-	}
+/* get the dscp values from iphc tf format and set it to ipv6hdr */
+static inline void lowpan_iphc_tf_set_dscp(struct ipv6hdr *hdr, const u8 *tf)
+{
+	/* DSCP is at place after ECN */
+	u8 dscp = tf[0] & 0x3f;
 
-	hdr.version = 6;
+	/* The four highest bits need to be set at hdr->priority */
+	hdr->priority |= ((dscp & 0x3c) >> 2);
+	/* The two lower bits is part of hdr->flow_lbl[0] */
+	hdr->flow_lbl[0] |= ((dscp & 0x03) << 6);
+}
 
-	/* Traffic Class and Flow Label */
-	switch ((iphc0 & LOWPAN_IPHC_TF) >> 3) {
-	/* Traffic Class and FLow Label carried in-line
-	 * ECN + DSCP + 4-bit Pad + Flow Label (4 bytes)
+/* get the flow label values from iphc tf format and set it to ipv6hdr */
+static inline void lowpan_iphc_tf_set_lbl(struct ipv6hdr *hdr, const u8 *lbl)
+{
+	/* flow label is always some array started with lower nibble of
+	 * flow_lbl[0] and followed with two bytes afterwards. Inside inline
+	 * data the flow_lbl position can be different, which will be handled
+	 * by lbl pointer. E.g. case "01" vs "00" the traffic class is 8 bit
+	 * shifted, the different lbl pointer will handle that.
+	 *
+	 * The flow label will started at lower nibble of flow_lbl[0], the
+	 * higher nibbles are part of DSCP + ECN.
 	 */
-	case 0: /* 00b */
-		if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
+	hdr->flow_lbl[0] |= lbl[0] & 0x0f;
+	memcpy(&hdr->flow_lbl[1], &lbl[1], 2);
+}
+
+/* lowpan_iphc_tf_decompress - decompress the traffic class.
+ *	This function will return zero on success, a value lower than zero if
+ *	failed.
+ */
+static int lowpan_iphc_tf_decompress(struct sk_buff *skb, struct ipv6hdr *hdr,
+				     u8 val)
+{
+	u8 tf[4];
+
+	/* Traffic Class and Flow Label */
+	switch (val) {
+	case LOWPAN_IPHC_TF_00:
+		/* ECN + DSCP + 4-bit Pad + Flow Label (4 bytes) */
+		if (lowpan_fetch_skb(skb, tf, 4))
 			return -EINVAL;
 
-		memcpy(&hdr.flow_lbl, &skb->data[0], 3);
-		skb_pull(skb, 3);
-		hdr.priority = ((tmp >> 2) & 0x0f);
-		hdr.flow_lbl[0] = ((tmp >> 2) & 0x30) | (tmp << 6) |
-					(hdr.flow_lbl[0] & 0x0f);
+		/*                      1                   2                   3
+		 *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+		 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+		 * |ECN|   DSCP    |  rsv  |             Flow Label                |
+		 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+		 */
+		lowpan_iphc_tf_set_ecn(hdr, tf);
+		lowpan_iphc_tf_set_dscp(hdr, tf);
+		lowpan_iphc_tf_set_lbl(hdr, &tf[1]);
 		break;
-	/* Traffic class carried in-line
-	 * ECN + DSCP (1 byte), Flow Label is elided
-	 */
-	case 2: /* 10b */
-		if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
+	case LOWPAN_IPHC_TF_01:
+		/* ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided. */
+		if (lowpan_fetch_skb(skb, tf, 3))
 			return -EINVAL;
 
-		hdr.priority = ((tmp >> 2) & 0x0f);
-		hdr.flow_lbl[0] = ((tmp << 6) & 0xC0) | ((tmp >> 2) & 0x30);
+		/*                     1                   2
+		 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+		 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+		 * |ECN|rsv|             Flow Label                |
+		 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+		 */
+		lowpan_iphc_tf_set_ecn(hdr, tf);
+		lowpan_iphc_tf_set_lbl(hdr, &tf[0]);
 		break;
-	/* Flow Label carried in-line
-	 * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
-	 */
-	case 1: /* 01b */
-		if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
+	case LOWPAN_IPHC_TF_10:
+		/* ECN + DSCP (1 byte), Flow Label is elided. */
+		if (lowpan_fetch_skb(skb, tf, 1))
 			return -EINVAL;
 
-		hdr.flow_lbl[0] = (tmp & 0x0F) | ((tmp >> 2) & 0x30);
-		memcpy(&hdr.flow_lbl[1], &skb->data[0], 2);
-		skb_pull(skb, 2);
+		/*  0 1 2 3 4 5 6 7
+		 * +-+-+-+-+-+-+-+-+
+		 * |ECN|   DSCP    |
+		 * +-+-+-+-+-+-+-+-+
+		 */
+		lowpan_iphc_tf_set_ecn(hdr, tf);
+		lowpan_iphc_tf_set_dscp(hdr, tf);
 		break;
-	/* Traffic Class and Flow Label are elided */
-	case 3: /* 11b */
+	case LOWPAN_IPHC_TF_11:
+		/* Traffic Class and Flow Label are elided */
 		break;
 	default:
-		break;
+		WARN_ON_ONCE(1);
+		return -EINVAL;
 	}
 
+	return 0;
+}
+
+/* TTL uncompression values */
+static const u8 lowpan_ttl_values[] = {
+	[LOWPAN_IPHC_HLIM_01] = 1,
+	[LOWPAN_IPHC_HLIM_10] = 64,
+	[LOWPAN_IPHC_HLIM_11] = 255,
+};
+
+int lowpan_header_decompress(struct sk_buff *skb, const struct net_device *dev,
+			     const void *daddr, const void *saddr)
+{
+	struct ipv6hdr hdr = {};
+	u8 iphc0, iphc1;
+	int err;
+
+	raw_dump_table(__func__, "raw skb data dump uncompressed",
+		       skb->data, skb->len);
+
+	if (lowpan_fetch_skb(skb, &iphc0, sizeof(iphc0)) ||
+	    lowpan_fetch_skb(skb, &iphc1, sizeof(iphc1)))
+		return -EINVAL;
+
+	/* another if the CID flag is set */
+	if (iphc1 & LOWPAN_IPHC_CID)
+		return -ENOTSUPP;
+
+	hdr.version = 6;
+
+	err = lowpan_iphc_tf_decompress(skb, &hdr,
+					iphc0 & LOWPAN_IPHC_TF_MASK);
+	if (err < 0)
+		return err;
+
 	/* Next Header */
-	if ((iphc0 & LOWPAN_IPHC_NH_C) == 0) {
+	if (!(iphc0 & LOWPAN_IPHC_NH)) {
 		/* Next header is carried inline */
 		if (lowpan_fetch_skb(skb, &hdr.nexthdr, sizeof(hdr.nexthdr)))
 			return -EINVAL;
@@ -305,35 +491,30 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	/* Hop Limit */
-	if ((iphc0 & 0x03) != LOWPAN_IPHC_TTL_I) {
-		hdr.hop_limit = lowpan_ttl_values[iphc0 & 0x03];
+	if ((iphc0 & LOWPAN_IPHC_HLIM_MASK) != LOWPAN_IPHC_HLIM_00) {
+		hdr.hop_limit = lowpan_ttl_values[iphc0 & LOWPAN_IPHC_HLIM_MASK];
 	} else {
 		if (lowpan_fetch_skb(skb, &hdr.hop_limit,
 				     sizeof(hdr.hop_limit)))
 			return -EINVAL;
 	}
 
-	/* Extract SAM to the tmp variable */
-	tmp = ((iphc1 & LOWPAN_IPHC_SAM) >> LOWPAN_IPHC_SAM_BIT) & 0x03;
-
 	if (iphc1 & LOWPAN_IPHC_SAC) {
 		/* Source address context based uncompression */
 		pr_debug("SAC bit is set. Handle context based source address.\n");
-		err = uncompress_context_based_src_addr(skb, &hdr.saddr, tmp);
+		err = uncompress_context_based_src_addr(skb, &hdr.saddr,
+							iphc1 & LOWPAN_IPHC_SAM_MASK);
 	} else {
 		/* Source address uncompression */
 		pr_debug("source address stateless compression\n");
-		err = uncompress_addr(skb, &hdr.saddr, tmp, saddr,
-				      saddr_type, saddr_len);
+		err = uncompress_addr(skb, dev, &hdr.saddr,
+				      iphc1 & LOWPAN_IPHC_SAM_MASK, saddr);
 	}
 
 	/* Check on error of previous branch */
 	if (err)
 		return -EINVAL;
 
-	/* Extract DAM to the tmp variable */
-	tmp = ((iphc1 & LOWPAN_IPHC_DAM_11) >> LOWPAN_IPHC_DAM_BIT) & 0x03;
-
 	/* check for Multicast Compression */
 	if (iphc1 & LOWPAN_IPHC_M) {
 		if (iphc1 & LOWPAN_IPHC_DAC) {
@@ -341,22 +522,22 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
 			/* TODO: implement this */
 		} else {
 			err = lowpan_uncompress_multicast_daddr(skb, &hdr.daddr,
-								tmp);
+								iphc1 & LOWPAN_IPHC_DAM_MASK);
 
 			if (err)
 				return -EINVAL;
 		}
 	} else {
-		err = uncompress_addr(skb, &hdr.daddr, tmp, daddr,
-				      daddr_type, daddr_len);
+		err = uncompress_addr(skb, dev, &hdr.daddr,
+				      iphc1 & LOWPAN_IPHC_DAM_MASK, daddr);
 		pr_debug("dest: stateless compression mode %d dest %pI6c\n",
-			 tmp, &hdr.daddr);
+			 iphc1 & LOWPAN_IPHC_DAM_MASK, &hdr.daddr);
 		if (err)
 			return -EINVAL;
 	}
 
 	/* Next header data uncompression */
-	if (iphc0 & LOWPAN_IPHC_NH_C) {
+	if (iphc0 & LOWPAN_IPHC_NH) {
 		err = lowpan_nhc_do_uncompression(skb, dev, &hdr);
 		if (err < 0)
 			return err;
@@ -366,7 +547,18 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
 			return err;
 	}
 
-	hdr.payload_len = htons(skb->len);
+	switch (lowpan_priv(dev)->lltype) {
+	case LOWPAN_LLTYPE_IEEE802154:
+		if (lowpan_802154_cb(skb)->d_size)
+			hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size -
+						sizeof(struct ipv6hdr));
+		else
+			hdr.payload_len = htons(skb->len);
+		break;
+	default:
+		hdr.payload_len = htons(skb->len);
+		break;
+	}
 
 	pr_debug("skb headroom size = %d, data length = %d\n",
 		 skb_headroom(skb), skb->len);
@@ -386,42 +578,176 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(lowpan_header_decompress);
 
-static u8 lowpan_compress_addr_64(u8 **hc_ptr, u8 shift,
-				  const struct in6_addr *ipaddr,
-				  const unsigned char *lladdr)
+static const u8 lowpan_iphc_dam_to_sam_value[] = {
+	[LOWPAN_IPHC_DAM_00] = LOWPAN_IPHC_SAM_00,
+	[LOWPAN_IPHC_DAM_01] = LOWPAN_IPHC_SAM_01,
+	[LOWPAN_IPHC_DAM_10] = LOWPAN_IPHC_SAM_10,
+	[LOWPAN_IPHC_DAM_11] = LOWPAN_IPHC_SAM_11,
+};
+
+static u8 lowpan_compress_addr_64(u8 **hc_ptr, const struct in6_addr *ipaddr,
+				  const unsigned char *lladdr, bool sam)
 {
-	u8 val = 0;
+	u8 dam = LOWPAN_IPHC_DAM_00;
 
 	if (is_addr_mac_addr_based(ipaddr, lladdr)) {
-		val = 3; /* 0-bits */
+		dam = LOWPAN_IPHC_DAM_11; /* 0-bits */
 		pr_debug("address compression 0 bits\n");
 	} else if (lowpan_is_iid_16_bit_compressable(ipaddr)) {
 		/* compress IID to 16 bits xxxx::XXXX */
 		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[7], 2);
-		val = 2; /* 16-bits */
+		dam = LOWPAN_IPHC_DAM_10; /* 16-bits */
 		raw_dump_inline(NULL, "Compressed ipv6 addr is (16 bits)",
 				*hc_ptr - 2, 2);
 	} else {
 		/* do not compress IID => xxxx::IID */
 		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr16[4], 8);
-		val = 1; /* 64-bits */
+		dam = LOWPAN_IPHC_DAM_01; /* 64-bits */
 		raw_dump_inline(NULL, "Compressed ipv6 addr is (64 bits)",
 				*hc_ptr - 8, 8);
 	}
 
-	return rol8(val, shift);
+	if (sam)
+		return lowpan_iphc_dam_to_sam_value[dam];
+	else
+		return dam;
+}
+
+/* lowpan_iphc_get_tc - get the ECN + DCSP fields in hc format */
+static inline u8 lowpan_iphc_get_tc(const struct ipv6hdr *hdr)
+{
+	u8 dscp, ecn;
+
+	/* hdr->priority contains the higher bits of dscp, lower are part of
+	 * flow_lbl[0]. Note ECN, DCSP is swapped in ipv6 hdr.
+	 */
+	dscp = (hdr->priority << 2) | ((hdr->flow_lbl[0] & 0xc0) >> 6);
+	/* ECN is at the two lower bits from first nibble of flow_lbl[0] */
+	ecn = (hdr->flow_lbl[0] & 0x30);
+	/* for pretty debug output, also shift ecn to get the ecn value */
+	pr_debug("ecn 0x%02x dscp 0x%02x\n", ecn >> 4, dscp);
+	/* ECN is at 0x30 now, shift it to have ECN + DCSP */
+	return (ecn << 2) | dscp;
 }
 
-int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
-			   unsigned short type, const void *_daddr,
-			   const void *_saddr, unsigned int len)
+/* lowpan_iphc_is_flow_lbl_zero - check if flow label is zero */
+static inline bool lowpan_iphc_is_flow_lbl_zero(const struct ipv6hdr *hdr)
 {
-	u8 tmp, iphc0, iphc1, *hc_ptr;
+	return ((!(hdr->flow_lbl[0] & 0x0f)) &&
+		!hdr->flow_lbl[1] && !hdr->flow_lbl[2]);
+}
+
+/* lowpan_iphc_tf_compress - compress the traffic class which is set by
+ *	ipv6hdr. Return the corresponding format identifier which is used.
+ */
+static u8 lowpan_iphc_tf_compress(u8 **hc_ptr, const struct ipv6hdr *hdr)
+{
+	/* get ecn dscp data in a byteformat as: ECN(hi) + DSCP(lo) */
+	u8 tc = lowpan_iphc_get_tc(hdr), tf[4], val;
+
+	/* printout the traffic class in hc format */
+	pr_debug("tc 0x%02x\n", tc);
+
+	if (lowpan_iphc_is_flow_lbl_zero(hdr)) {
+		if (!tc) {
+			/* 11:  Traffic Class and Flow Label are elided. */
+			val = LOWPAN_IPHC_TF_11;
+		} else {
+			/* 10:  ECN + DSCP (1 byte), Flow Label is elided.
+			 *
+			 *  0 1 2 3 4 5 6 7
+			 * +-+-+-+-+-+-+-+-+
+			 * |ECN|   DSCP    |
+			 * +-+-+-+-+-+-+-+-+
+			 */
+			lowpan_push_hc_data(hc_ptr, &tc, sizeof(tc));
+			val = LOWPAN_IPHC_TF_10;
+		}
+	} else {
+		/* check if dscp is zero, it's after the first two bit */
+		if (!(tc & 0x3f)) {
+			/* 01:  ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
+			 *
+			 *                     1                   2
+			 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+			 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+			 * |ECN|rsv|             Flow Label                |
+			 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+			 */
+			memcpy(&tf[0], &hdr->flow_lbl[0], 3);
+			/* zero the highest 4-bits, contains DCSP + ECN */
+			tf[0] &= ~0xf0;
+			/* set ECN */
+			tf[0] |= (tc & 0xc0);
+
+			lowpan_push_hc_data(hc_ptr, tf, 3);
+			val = LOWPAN_IPHC_TF_01;
+		} else {
+			/* 00:  ECN + DSCP + 4-bit Pad + Flow Label (4 bytes)
+			 *
+			 *                      1                   2                   3
+			 *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+			 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+			 * |ECN|   DSCP    |  rsv  |             Flow Label                |
+			 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+			 */
+			memcpy(&tf[0], &tc, sizeof(tc));
+			/* highest nibble of flow_lbl[0] is part of DSCP + ECN
+			 * which will be the 4-bit pad and will be filled with
+			 * zeros afterwards.
+			 */
+			memcpy(&tf[1], &hdr->flow_lbl[0], 3);
+			/* zero the 4-bit pad, which is reserved */
+			tf[1] &= ~0xf0;
+
+			lowpan_push_hc_data(hc_ptr, tf, 4);
+			val = LOWPAN_IPHC_TF_00;
+		}
+	}
+
+	return val;
+}
+
+static u8 lowpan_iphc_mcast_addr_compress(u8 **hc_ptr,
+					  const struct in6_addr *ipaddr)
+{
+	u8 val;
+
+	if (lowpan_is_mcast_addr_compressable8(ipaddr)) {
+		pr_debug("compressed to 1 octet\n");
+		/* use last byte */
+		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[15], 1);
+		val = LOWPAN_IPHC_DAM_11;
+	} else if (lowpan_is_mcast_addr_compressable32(ipaddr)) {
+		pr_debug("compressed to 4 octets\n");
+		/* second byte + the last three */
+		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[1], 1);
+		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[13], 3);
+		val = LOWPAN_IPHC_DAM_10;
+	} else if (lowpan_is_mcast_addr_compressable48(ipaddr)) {
+		pr_debug("compressed to 6 octets\n");
+		/* second byte + the last five */
+		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[1], 1);
+		lowpan_push_hc_data(hc_ptr, &ipaddr->s6_addr[11], 5);
+		val = LOWPAN_IPHC_DAM_01;
+	} else {
+		pr_debug("using full address\n");
+		lowpan_push_hc_data(hc_ptr, ipaddr->s6_addr, 16);
+		val = LOWPAN_IPHC_DAM_00;
+	}
+
+	return val;
+}
+
+int lowpan_header_compress(struct sk_buff *skb, const struct net_device *dev,
+			   const void *daddr, const void *saddr)
+{
+	u8 iphc0, iphc1, *hc_ptr;
 	struct ipv6hdr *hdr;
-	u8 head[100] = {};
+	u8 head[LOWPAN_IPHC_MAX_HC_BUF_LEN] = {};
 	int ret, addr_type;
 
-	if (type != ETH_P_IPV6)
+	if (skb->protocol != htons(ETH_P_IPV6))
 		return -EINVAL;
 
 	hdr = ipv6_hdr(skb);
@@ -445,63 +771,26 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
 
 	/* TODO: context lookup */
 
-	raw_dump_inline(__func__, "saddr",
-			(unsigned char *)_saddr, IEEE802154_ADDR_LEN);
-	raw_dump_inline(__func__, "daddr",
-			(unsigned char *)_daddr, IEEE802154_ADDR_LEN);
+	raw_dump_inline(__func__, "saddr", saddr, EUI64_ADDR_LEN);
+	raw_dump_inline(__func__, "daddr", daddr, EUI64_ADDR_LEN);
 
 	raw_dump_table(__func__, "sending raw skb network uncompressed packet",
 		       skb->data, skb->len);
 
-	/* Traffic class, flow label
-	 * If flow label is 0, compress it. If traffic class is 0, compress it
-	 * We have to process both in the same time as the offset of traffic
-	 * class depends on the presence of version and flow label
-	 */
-
-	/* hc format of TC is ECN | DSCP , original one is DSCP | ECN */
-	tmp = (hdr->priority << 4) | (hdr->flow_lbl[0] >> 4);
-	tmp = ((tmp & 0x03) << 6) | (tmp >> 2);
-
-	if (((hdr->flow_lbl[0] & 0x0F) == 0) &&
-	    (hdr->flow_lbl[1] == 0) && (hdr->flow_lbl[2] == 0)) {
-		/* flow label can be compressed */
-		iphc0 |= LOWPAN_IPHC_FL_C;
-		if ((hdr->priority == 0) &&
-		    ((hdr->flow_lbl[0] & 0xF0) == 0)) {
-			/* compress (elide) all */
-			iphc0 |= LOWPAN_IPHC_TC_C;
-		} else {
-			/* compress only the flow label */
-			*hc_ptr = tmp;
-			hc_ptr += 1;
-		}
-	} else {
-		/* Flow label cannot be compressed */
-		if ((hdr->priority == 0) &&
-		    ((hdr->flow_lbl[0] & 0xF0) == 0)) {
-			/* compress only traffic class */
-			iphc0 |= LOWPAN_IPHC_TC_C;
-			*hc_ptr = (tmp & 0xc0) | (hdr->flow_lbl[0] & 0x0F);
-			memcpy(hc_ptr + 1, &hdr->flow_lbl[1], 2);
-			hc_ptr += 3;
-		} else {
-			/* compress nothing */
-			memcpy(hc_ptr, hdr, 4);
-			/* replace the top byte with new ECN | DSCP format */
-			*hc_ptr = tmp;
-			hc_ptr += 4;
-		}
-	}
+	/* Traffic Class, Flow Label compression */
+	iphc0 |= lowpan_iphc_tf_compress(&hc_ptr, hdr);
 
 	/* NOTE: payload length is always compressed */
 
 	/* Check if we provide the nhc format for nexthdr and compression
 	 * functionality. If not nexthdr is handled inline and not compressed.
 	 */
-	ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr, &iphc0);
-	if (ret < 0)
-		return ret;
+	ret = lowpan_nhc_check_compression(skb, hdr, &hc_ptr);
+	if (ret == -ENOENT)
+		lowpan_push_hc_data(&hc_ptr, &hdr->nexthdr,
+				    sizeof(hdr->nexthdr));
+	else
+		iphc0 |= LOWPAN_IPHC_NH;
 
 	/* Hop limit
 	 * if 1:   compress, encoding is 01
@@ -511,13 +800,13 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
 	 */
 	switch (hdr->hop_limit) {
 	case 1:
-		iphc0 |= LOWPAN_IPHC_TTL_1;
+		iphc0 |= LOWPAN_IPHC_HLIM_01;
 		break;
 	case 64:
-		iphc0 |= LOWPAN_IPHC_TTL_64;
+		iphc0 |= LOWPAN_IPHC_HLIM_10;
 		break;
 	case 255:
-		iphc0 |= LOWPAN_IPHC_TTL_255;
+		iphc0 |= LOWPAN_IPHC_HLIM_11;
 		break;
 	default:
 		lowpan_push_hc_data(&hc_ptr, &hdr->hop_limit,
@@ -531,9 +820,8 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
 		iphc1 |= LOWPAN_IPHC_SAC;
 	} else {
 		if (addr_type & IPV6_ADDR_LINKLOCAL) {
-			iphc1 |= lowpan_compress_addr_64(&hc_ptr,
-							 LOWPAN_IPHC_SAM_BIT,
-							 &hdr->saddr, _saddr);
+			iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->saddr,
+							 saddr, true);
 			pr_debug("source address unicast link-local %pI6c iphc1 0x%02x\n",
 				 &hdr->saddr, iphc1);
 		} else {
@@ -547,38 +835,12 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
 	if (addr_type & IPV6_ADDR_MULTICAST) {
 		pr_debug("destination address is multicast: ");
 		iphc1 |= LOWPAN_IPHC_M;
-		if (lowpan_is_mcast_addr_compressable8(&hdr->daddr)) {
-			pr_debug("compressed to 1 octet\n");
-			iphc1 |= LOWPAN_IPHC_DAM_11;
-			/* use last byte */
-			lowpan_push_hc_data(&hc_ptr,
-					    &hdr->daddr.s6_addr[15], 1);
-		} else if (lowpan_is_mcast_addr_compressable32(&hdr->daddr)) {
-			pr_debug("compressed to 4 octets\n");
-			iphc1 |= LOWPAN_IPHC_DAM_10;
-			/* second byte + the last three */
-			lowpan_push_hc_data(&hc_ptr,
-					    &hdr->daddr.s6_addr[1], 1);
-			lowpan_push_hc_data(&hc_ptr,
-					    &hdr->daddr.s6_addr[13], 3);
-		} else if (lowpan_is_mcast_addr_compressable48(&hdr->daddr)) {
-			pr_debug("compressed to 6 octets\n");
-			iphc1 |= LOWPAN_IPHC_DAM_01;
-			/* second byte + the last five */
-			lowpan_push_hc_data(&hc_ptr,
-					    &hdr->daddr.s6_addr[1], 1);
-			lowpan_push_hc_data(&hc_ptr,
-					    &hdr->daddr.s6_addr[11], 5);
-		} else {
-			pr_debug("using full address\n");
-			iphc1 |= LOWPAN_IPHC_DAM_00;
-			lowpan_push_hc_data(&hc_ptr, hdr->daddr.s6_addr, 16);
-		}
+		iphc1 |= lowpan_iphc_mcast_addr_compress(&hc_ptr, &hdr->daddr);
 	} else {
 		if (addr_type & IPV6_ADDR_LINKLOCAL) {
 			/* TODO: context lookup */
-			iphc1 |= lowpan_compress_addr_64(&hc_ptr,
-				LOWPAN_IPHC_DAM_BIT, &hdr->daddr, _daddr);
+			iphc1 |= lowpan_compress_addr_64(&hc_ptr, &hdr->daddr,
+							 daddr, false);
 			pr_debug("dest address unicast link-local %pI6c "
 				 "iphc1 0x%02x\n", &hdr->daddr, iphc1);
 		} else {
@@ -588,7 +850,7 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	/* next header compression */
-	if (iphc0 & LOWPAN_IPHC_NH_C) {
+	if (iphc0 & LOWPAN_IPHC_NH) {
 		ret = lowpan_nhc_do_compression(skb, hdr, &hc_ptr);
 		if (ret < 0)
 			return ret;
diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c
index fd20fc51a7c4..7008d53e455c 100644
--- a/net/6lowpan/nhc.c
+++ b/net/6lowpan/nhc.c
@@ -95,23 +95,20 @@ static struct lowpan_nhc *lowpan_nhc_by_nhcid(const struct sk_buff *skb)
 }
 
 int lowpan_nhc_check_compression(struct sk_buff *skb,
-				 const struct ipv6hdr *hdr, u8 **hc_ptr,
-				 u8 *iphc0)
+				 const struct ipv6hdr *hdr, u8 **hc_ptr)
 {
 	struct lowpan_nhc *nhc;
+	int ret = 0;
 
 	spin_lock_bh(&lowpan_nhc_lock);
 
 	nhc = lowpan_nexthdr_nhcs[hdr->nexthdr];
-	if (nhc && nhc->compress)
-		*iphc0 |= LOWPAN_IPHC_NH_C;
-	else
-		lowpan_push_hc_data(hc_ptr, &hdr->nexthdr,
-				    sizeof(hdr->nexthdr));
+	if (!(nhc && nhc->compress))
+		ret = -ENOENT;
 
 	spin_unlock_bh(&lowpan_nhc_lock);
 
-	return 0;
+	return ret;
 }
 
 int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
@@ -157,7 +154,8 @@ out:
 	return ret;
 }
 
-int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev,
+int lowpan_nhc_do_uncompression(struct sk_buff *skb,
+				const struct net_device *dev,
 				struct ipv6hdr *hdr)
 {
 	struct lowpan_nhc *nhc;
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index ed44938eb5de..803041400136 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -8,8 +8,6 @@
 #include <net/6lowpan.h>
 #include <net/ipv6.h>
 
-#define LOWPAN_NHC_MAX_ID_LEN	1
-
 /**
  * LOWPAN_NHC - helper macro to generate nh id fields and lowpan_nhc struct
  *
@@ -88,19 +86,16 @@ struct lowpan_nhc *lowpan_nhc_by_nexthdr(u8 nexthdr);
 
 /**
  * lowpan_nhc_check_compression - checks if we support compression format. If
- *	we support the nhc by nexthdr field, the 6LoWPAN iphc NHC bit will be
- *	set. If we don't support nexthdr will be added as inline data to the
- *	6LoWPAN header.
+ *	we support the nhc by nexthdr field, the function will return 0. If we
+ *	don't support the nhc by nexthdr this function will return -ENOENT.
  *
  * @skb: skb of 6LoWPAN header to read nhc and replace header.
  * @hdr: ipv6hdr to check the nexthdr value
  * @hc_ptr: pointer for 6LoWPAN header which should increment at the end of
  *	    replaced header.
- * @iphc0: iphc0 pointer to set the 6LoWPAN NHC bit
  */
 int lowpan_nhc_check_compression(struct sk_buff *skb,
-				 const struct ipv6hdr *hdr, u8 **hc_ptr,
-				 u8 *iphc0);
+				 const struct ipv6hdr *hdr, u8 **hc_ptr);
 
 /**
  * lowpan_nhc_do_compression - calling compress callback for nhc
@@ -121,7 +116,8 @@ int lowpan_nhc_do_compression(struct sk_buff *skb, const struct ipv6hdr *hdr,
  * @dev: netdevice for print logging information.
  * @hdr: ipv6hdr for setting nexthdr value.
  */
-int lowpan_nhc_do_uncompression(struct sk_buff *skb, struct net_device *dev,
+int lowpan_nhc_do_uncompression(struct sk_buff *skb,
+				const struct net_device *dev,
 				struct ipv6hdr *hdr);
 
 /**
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index c6bcaeb428ae..69537a2eaab1 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -17,7 +17,27 @@
 
 #include "nhc.h"
 
-#define LOWPAN_NHC_UDP_IDLEN	1
+#define LOWPAN_NHC_UDP_MASK		0xF8
+#define LOWPAN_NHC_UDP_ID		0xF0
+#define LOWPAN_NHC_UDP_IDLEN		1
+
+#define LOWPAN_NHC_UDP_4BIT_PORT	0xF0B0
+#define LOWPAN_NHC_UDP_4BIT_MASK	0xFFF0
+#define LOWPAN_NHC_UDP_8BIT_PORT	0xF000
+#define LOWPAN_NHC_UDP_8BIT_MASK	0xFF00
+
+/* values for port compression, _with checksum_ ie bit 5 set to 0 */
+
+/* all inline */
+#define LOWPAN_NHC_UDP_CS_P_00	0xF0
+/* source 16bit inline, dest = 0xF0 + 8 bit inline */
+#define LOWPAN_NHC_UDP_CS_P_01	0xF1
+/* source = 0xF0 + 8bit inline, dest = 16 bit inline */
+#define LOWPAN_NHC_UDP_CS_P_10	0xF2
+/* source & dest = 0xF0B + 4bit inline */
+#define LOWPAN_NHC_UDP_CS_P_11	0xF3
+/* checksum elided */
+#define LOWPAN_NHC_UDP_CS_C	0x04
 
 static int udp_uncompress(struct sk_buff *skb, size_t needed)
 {
@@ -71,7 +91,18 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed)
 	 * here, we obtain the hint from the remaining size of the
 	 * frame
 	 */
-	uh.len = htons(skb->len + sizeof(struct udphdr));
+	switch (lowpan_priv(skb->dev)->lltype) {
+	case LOWPAN_LLTYPE_IEEE802154:
+		if (lowpan_802154_cb(skb)->d_size)
+			uh.len = htons(lowpan_802154_cb(skb)->d_size -
+				       sizeof(struct ipv6hdr));
+		else
+			uh.len = htons(skb->len + sizeof(struct udphdr));
+		break;
+	default:
+		uh.len = htons(skb->len + sizeof(struct udphdr));
+		break;
+	}
 	pr_debug("uncompressed UDP length: src = %d", ntohs(uh.len));
 
 	/* replace the compressed UDP head by the uncompressed UDP
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 61bf2a06e85d..496b27588493 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -206,7 +206,10 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
 		return -ENOMEM;
 
 	if (vlan_hw_filter_capable(dev, vid_info)) {
-		err =  ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+		if (netif_device_present(dev))
+			err = ops->ndo_vlan_rx_add_vid(dev, proto, vid);
+		else
+			err = -ENODEV;
 		if (err) {
 			kfree(vid_info);
 			return err;
@@ -264,7 +267,10 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
 	int err;
 
 	if (vlan_hw_filter_capable(dev, vid_info)) {
-		err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+		if (netif_device_present(dev))
+			err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
+		else
+			err = -ENODEV;
 		if (err) {
 			pr_warn("failed to kill vid %04x/%d for device %s\n",
 				proto, vid, dev->name);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index ba1210253f5e..52b4a2f993f2 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
 		return -ENOMEM;
 
 	/* Create the RDMA CM ID */
-	rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
-				     IB_QPT_RC);
+	rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
+				     RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(rdma->cm_id))
 		goto error;
 
diff --git a/net/Kconfig b/net/Kconfig
index 7021c1bf44d6..127da94ae25e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,7 @@ source "net/netlink/Kconfig"
 source "net/mpls/Kconfig"
 source "net/hsr/Kconfig"
 source "net/switchdev/Kconfig"
+source "net/l3mdev/Kconfig"
 
 config RPS
 	bool
diff --git a/net/Makefile b/net/Makefile
index 3995613e5510..a5d04098dfce 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -74,3 +74,6 @@ obj-$(CONFIG_HSR)		+= hsr/
 ifneq ($(CONFIG_NET_SWITCHDEV),)
 obj-y				+= switchdev/
 endif
+ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
+obj-y				+= l3mdev/
+endif
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 131e79cde350..9e9cca3689a0 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -21,8 +21,6 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 
-#include <net/af_ieee802154.h> /* to get the address type */
-
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/l2cap.h>
@@ -35,7 +33,6 @@ static struct dentry *lowpan_enable_debugfs;
 static struct dentry *lowpan_control_debugfs;
 
 #define IFACE_NAME_TEMPLATE "bt%d"
-#define EUI64_ADDR_LEN 8
 
 struct skb_cb {
 	struct in6_addr addr;
@@ -266,14 +263,13 @@ static int give_skb_to_upper(struct sk_buff *skb, struct net_device *dev)
 	if (!skb_cp)
 		return NET_RX_DROP;
 
-	return netif_rx(skb_cp);
+	return netif_rx_ni(skb_cp);
 }
 
 static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
 			   struct l2cap_chan *chan)
 {
 	const u8 *saddr, *daddr;
-	u8 iphc0, iphc1;
 	struct lowpan_dev *dev;
 	struct lowpan_peer *peer;
 
@@ -288,22 +284,7 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev,
 	saddr = peer->eui64_addr;
 	daddr = dev->netdev->dev_addr;
 
-	/* at least two bytes will be used for the encoding */
-	if (skb->len < 2)
-		return -EINVAL;
-
-	if (lowpan_fetch_skb_u8(skb, &iphc0))
-		return -EINVAL;
-
-	if (lowpan_fetch_skb_u8(skb, &iphc1))
-		return -EINVAL;
-
-	return lowpan_header_decompress(skb, netdev,
-					saddr, IEEE802154_ADDR_LONG,
-					EUI64_ADDR_LEN, daddr,
-					IEEE802154_ADDR_LONG, EUI64_ADDR_LEN,
-					iphc0, iphc1);
-
+	return lowpan_header_decompress(skb, netdev, daddr, saddr);
 }
 
 static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
@@ -315,15 +296,17 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 	if (!netif_running(dev))
 		goto drop;
 
-	if (dev->type != ARPHRD_6LOWPAN)
+	if (dev->type != ARPHRD_6LOWPAN || !skb->len)
 		goto drop;
 
+	skb_reset_network_header(skb);
+
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (!skb)
 		goto drop;
 
 	/* check that it's our buffer */
-	if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
+	if (lowpan_is_ipv6(*skb_network_header(skb))) {
 		/* Copy the packet so that the IPv6 header is
 		 * properly aligned.
 		 */
@@ -335,7 +318,6 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 		local_skb->protocol = htons(ETH_P_IPV6);
 		local_skb->pkt_type = PACKET_HOST;
 
-		skb_reset_network_header(local_skb);
 		skb_set_transport_header(local_skb, sizeof(struct ipv6hdr));
 
 		if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) {
@@ -348,38 +330,34 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev,
 
 		consume_skb(local_skb);
 		consume_skb(skb);
-	} else {
-		switch (skb->data[0] & 0xe0) {
-		case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
-			local_skb = skb_clone(skb, GFP_ATOMIC);
-			if (!local_skb)
-				goto drop;
+	} else if (lowpan_is_iphc(*skb_network_header(skb))) {
+		local_skb = skb_clone(skb, GFP_ATOMIC);
+		if (!local_skb)
+			goto drop;
 
-			ret = iphc_decompress(local_skb, dev, chan);
-			if (ret < 0) {
-				kfree_skb(local_skb);
-				goto drop;
-			}
+		ret = iphc_decompress(local_skb, dev, chan);
+		if (ret < 0) {
+			kfree_skb(local_skb);
+			goto drop;
+		}
 
-			local_skb->protocol = htons(ETH_P_IPV6);
-			local_skb->pkt_type = PACKET_HOST;
-			local_skb->dev = dev;
+		local_skb->protocol = htons(ETH_P_IPV6);
+		local_skb->pkt_type = PACKET_HOST;
+		local_skb->dev = dev;
 
-			if (give_skb_to_upper(local_skb, dev)
-					!= NET_RX_SUCCESS) {
-				kfree_skb(local_skb);
-				goto drop;
-			}
+		if (give_skb_to_upper(local_skb, dev)
+				!= NET_RX_SUCCESS) {
+			kfree_skb(local_skb);
+			goto drop;
+		}
 
-			dev->stats.rx_bytes += skb->len;
-			dev->stats.rx_packets++;
+		dev->stats.rx_bytes += skb->len;
+		dev->stats.rx_packets++;
 
-			consume_skb(local_skb);
-			consume_skb(skb);
-			break;
-		default:
-			break;
-		}
+		consume_skb(local_skb);
+		consume_skb(skb);
+	} else {
+		goto drop;
 	}
 
 	return NET_RX_SUCCESS;
@@ -493,8 +471,7 @@ static int setup_header(struct sk_buff *skb, struct net_device *netdev,
 		status = 1;
 	}
 
-	lowpan_header_compress(skb, netdev, ETH_P_IPV6, daddr,
-			       dev->netdev->dev_addr, skb->len);
+	lowpan_header_compress(skb, netdev, daddr, dev->netdev->dev_addr);
 
 	err = dev_hard_header(skb, netdev, ETH_P_IPV6, NULL, NULL, 0);
 	if (err < 0)
@@ -674,13 +651,8 @@ static struct header_ops header_ops = {
 
 static void netdev_setup(struct net_device *dev)
 {
-	dev->addr_len		= EUI64_ADDR_LEN;
-	dev->type		= ARPHRD_6LOWPAN;
-
 	dev->hard_header_len	= 0;
 	dev->needed_tailroom	= 0;
-	dev->mtu		= IPV6_MIN_MTU;
-	dev->tx_queue_len	= 0;
 	dev->flags		= IFF_RUNNING | IFF_POINTOPOINT |
 				  IFF_MULTICAST;
 	dev->watchdog_timeo	= 0;
@@ -775,24 +747,7 @@ static struct l2cap_chan *chan_create(void)
 
 	chan->chan_type = L2CAP_CHAN_CONN_ORIENTED;
 	chan->mode = L2CAP_MODE_LE_FLOWCTL;
-	chan->omtu = 65535;
-	chan->imtu = chan->omtu;
-
-	return chan;
-}
-
-static struct l2cap_chan *chan_open(struct l2cap_chan *pchan)
-{
-	struct l2cap_chan *chan;
-
-	chan = chan_create();
-	if (!chan)
-		return NULL;
-
-	chan->remote_mps = chan->omtu;
-	chan->mps = chan->omtu;
-
-	chan->state = BT_CONNECTED;
+	chan->imtu = 1280;
 
 	return chan;
 }
@@ -919,7 +874,10 @@ static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan)
 {
 	struct l2cap_chan *chan;
 
-	chan = chan_open(pchan);
+	chan = chan_create();
+	if (!chan)
+		return NULL;
+
 	chan->ops = pchan->ops;
 
 	BT_DBG("chan %p pchan %p", chan, pchan);
@@ -1065,34 +1023,23 @@ static inline __u8 bdaddr_type(__u8 type)
 		return BDADDR_LE_RANDOM;
 }
 
-static struct l2cap_chan *chan_get(void)
-{
-	struct l2cap_chan *pchan;
-
-	pchan = chan_create();
-	if (!pchan)
-		return NULL;
-
-	pchan->ops = &bt_6lowpan_chan_ops;
-
-	return pchan;
-}
-
 static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type)
 {
-	struct l2cap_chan *pchan;
+	struct l2cap_chan *chan;
 	int err;
 
-	pchan = chan_get();
-	if (!pchan)
+	chan = chan_create();
+	if (!chan)
 		return -EINVAL;
 
-	err = l2cap_chan_connect(pchan, cpu_to_le16(L2CAP_PSM_IPSP), 0,
+	chan->ops = &bt_6lowpan_chan_ops;
+
+	err = l2cap_chan_connect(chan, cpu_to_le16(L2CAP_PSM_IPSP), 0,
 				 addr, dst_type);
 
-	BT_DBG("chan %p err %d", pchan, err);
+	BT_DBG("chan %p err %d", chan, err);
 	if (err < 0)
-		l2cap_chan_put(pchan);
+		l2cap_chan_put(chan);
 
 	return err;
 }
@@ -1117,31 +1064,32 @@ static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type)
 static struct l2cap_chan *bt_6lowpan_listen(void)
 {
 	bdaddr_t *addr = BDADDR_ANY;
-	struct l2cap_chan *pchan;
+	struct l2cap_chan *chan;
 	int err;
 
 	if (!enable_6lowpan)
 		return NULL;
 
-	pchan = chan_get();
-	if (!pchan)
+	chan = chan_create();
+	if (!chan)
 		return NULL;
 
-	pchan->state = BT_LISTEN;
-	pchan->src_type = BDADDR_LE_PUBLIC;
+	chan->ops = &bt_6lowpan_chan_ops;
+	chan->state = BT_LISTEN;
+	chan->src_type = BDADDR_LE_PUBLIC;
 
-	atomic_set(&pchan->nesting, L2CAP_NESTING_PARENT);
+	atomic_set(&chan->nesting, L2CAP_NESTING_PARENT);
 
-	BT_DBG("chan %p src type %d", pchan, pchan->src_type);
+	BT_DBG("chan %p src type %d", chan, chan->src_type);
 
-	err = l2cap_add_psm(pchan, addr, cpu_to_le16(L2CAP_PSM_IPSP));
+	err = l2cap_add_psm(chan, addr, cpu_to_le16(L2CAP_PSM_IPSP));
 	if (err) {
-		l2cap_chan_put(pchan);
+		l2cap_chan_put(chan);
 		BT_ERR("psm cannot be added err %d", err);
 		return NULL;
 	}
 
-	return pchan;
+	return chan;
 }
 
 static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
@@ -1165,7 +1113,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
 		return -ENOENT;
 
 	hci_dev_lock(hdev);
-	hcon = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+	hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type);
 	hci_dev_unlock(hdev);
 
 	if (!hcon)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 70f9d945faf7..a3bffd1ec2b4 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -33,7 +33,7 @@
 
 #include "selftest.h"
 
-#define VERSION "2.20"
+#define VERSION "2.21"
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
@@ -221,7 +221,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	BT_DBG("sock %p sk %p len %zu", sock, sk, len);
 
-	if (flags & (MSG_OOB))
+	if (flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2dda439c8cb8..85b82f7adbd2 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -59,15 +59,11 @@ static const struct sco_param esco_param_msbc[] = {
 	{ EDR_ESCO_MASK | ESCO_EV3,   0x0008,	0x02 }, /* T1 */
 };
 
-static void hci_le_create_connection_cancel(struct hci_conn *conn)
-{
-	hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
-}
-
 /* This function requires the caller holds hdev->lock */
 static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
 {
 	struct hci_conn_params *params;
+	struct hci_dev *hdev = conn->hdev;
 	struct smp_irk *irk;
 	bdaddr_t *bdaddr;
 	u8 bdaddr_type;
@@ -76,14 +72,15 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
 	bdaddr_type = conn->dst_type;
 
 	/* Check if we need to convert to identity address */
-	irk = hci_get_irk(conn->hdev, bdaddr, bdaddr_type);
+	irk = hci_get_irk(hdev, bdaddr, bdaddr_type);
 	if (irk) {
 		bdaddr = &irk->bdaddr;
 		bdaddr_type = irk->addr_type;
 	}
 
-	params = hci_explicit_connect_lookup(conn->hdev, bdaddr, bdaddr_type);
-	if (!params)
+	params = hci_pend_le_action_lookup(&hdev->pend_le_conns, bdaddr,
+					   bdaddr_type);
+	if (!params || !params->explicit_connect)
 		return;
 
 	/* The connection attempt was doing scan for new RPA, and is
@@ -97,21 +94,21 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
 
 	switch (params->auto_connect) {
 	case HCI_AUTO_CONN_EXPLICIT:
-		hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type);
+		hci_conn_params_del(hdev, bdaddr, bdaddr_type);
 		/* return instead of break to avoid duplicate scan update */
 		return;
 	case HCI_AUTO_CONN_DIRECT:
 	case HCI_AUTO_CONN_ALWAYS:
-		list_add(&params->action, &conn->hdev->pend_le_conns);
+		list_add(&params->action, &hdev->pend_le_conns);
 		break;
 	case HCI_AUTO_CONN_REPORT:
-		list_add(&params->action, &conn->hdev->pend_le_reports);
+		list_add(&params->action, &hdev->pend_le_reports);
 		break;
 	default:
 		break;
 	}
 
-	hci_update_background_scan(conn->hdev);
+	hci_update_background_scan(hdev);
 }
 
 static void hci_conn_cleanup(struct hci_conn *conn)
@@ -137,18 +134,51 @@ static void hci_conn_cleanup(struct hci_conn *conn)
 	hci_conn_put(conn);
 }
 
-/* This function requires the caller holds hdev->lock */
-static void hci_connect_le_scan_remove(struct hci_conn *conn)
+static void le_scan_cleanup(struct work_struct *work)
 {
-	hci_connect_le_scan_cleanup(conn);
+	struct hci_conn *conn = container_of(work, struct hci_conn,
+					     le_scan_cleanup);
+	struct hci_dev *hdev = conn->hdev;
+	struct hci_conn *c = NULL;
 
-	/* We can't call hci_conn_del here since that would deadlock
-	 * with trying to call cancel_delayed_work_sync(&conn->disc_work).
-	 * Instead, call just hci_conn_cleanup() which contains the bare
-	 * minimum cleanup operations needed for a connection in this
-	 * state.
+	BT_DBG("%s hcon %p", hdev->name, conn);
+
+	hci_dev_lock(hdev);
+
+	/* Check that the hci_conn is still around */
+	rcu_read_lock();
+	list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) {
+		if (c == conn)
+			break;
+	}
+	rcu_read_unlock();
+
+	if (c == conn) {
+		hci_connect_le_scan_cleanup(conn);
+		hci_conn_cleanup(conn);
+	}
+
+	hci_dev_unlock(hdev);
+	hci_dev_put(hdev);
+	hci_conn_put(conn);
+}
+
+static void hci_connect_le_scan_remove(struct hci_conn *conn)
+{
+	BT_DBG("%s hcon %p", conn->hdev->name, conn);
+
+	/* We can't call hci_conn_del/hci_conn_cleanup here since that
+	 * could deadlock with another hci_conn_del() call that's holding
+	 * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work).
+	 * Instead, grab temporary extra references to the hci_dev and
+	 * hci_conn and perform the necessary cleanup in a separate work
+	 * callback.
 	 */
-	hci_conn_cleanup(conn);
+
+	hci_dev_hold(conn->hdev);
+	hci_conn_get(conn);
+
+	schedule_work(&conn->le_scan_cleanup);
 }
 
 static void hci_acl_create_connection(struct hci_conn *conn)
@@ -194,33 +224,8 @@ static void hci_acl_create_connection(struct hci_conn *conn)
 	hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp);
 }
 
-static void hci_acl_create_connection_cancel(struct hci_conn *conn)
-{
-	struct hci_cp_create_conn_cancel cp;
-
-	BT_DBG("hcon %p", conn);
-
-	if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2)
-		return;
-
-	bacpy(&cp.bdaddr, &conn->dst);
-	hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);
-}
-
-static void hci_reject_sco(struct hci_conn *conn)
-{
-	struct hci_cp_reject_sync_conn_req cp;
-
-	cp.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
-	bacpy(&cp.bdaddr, &conn->dst);
-
-	hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp);
-}
-
 int hci_disconnect(struct hci_conn *conn, __u8 reason)
 {
-	struct hci_cp_disconnect cp;
-
 	BT_DBG("hcon %p", conn);
 
 	/* When we are master of an established connection and it enters
@@ -228,7 +233,8 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
 	 * current clock offset.  Processing of the result is done
 	 * within the event handling and hci_clock_offset_evt function.
 	 */
-	if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER) {
+	if (conn->type == ACL_LINK && conn->role == HCI_ROLE_MASTER &&
+	    (conn->state == BT_CONNECTED || conn->state == BT_CONFIG)) {
 		struct hci_dev *hdev = conn->hdev;
 		struct hci_cp_read_clock_offset clkoff_cp;
 
@@ -237,25 +243,7 @@ int hci_disconnect(struct hci_conn *conn, __u8 reason)
 			     &clkoff_cp);
 	}
 
-	conn->state = BT_DISCONN;
-
-	cp.handle = cpu_to_le16(conn->handle);
-	cp.reason = reason;
-	return hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, sizeof(cp), &cp);
-}
-
-static void hci_amp_disconn(struct hci_conn *conn)
-{
-	struct hci_cp_disconn_phy_link cp;
-
-	BT_DBG("hcon %p", conn);
-
-	conn->state = BT_DISCONN;
-
-	cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
-	cp.reason = hci_proto_disconn_ind(conn);
-	hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK,
-		     sizeof(cp), &cp);
+	return hci_abort_conn(conn, reason);
 }
 
 static void hci_add_sco(struct hci_conn *conn, __u16 handle)
@@ -421,35 +409,14 @@ static void hci_conn_timeout(struct work_struct *work)
 	if (refcnt > 0)
 		return;
 
-	switch (conn->state) {
-	case BT_CONNECT:
-	case BT_CONNECT2:
-		if (conn->out) {
-			if (conn->type == ACL_LINK)
-				hci_acl_create_connection_cancel(conn);
-			else if (conn->type == LE_LINK) {
-				if (test_bit(HCI_CONN_SCANNING, &conn->flags))
-					hci_connect_le_scan_remove(conn);
-				else
-					hci_le_create_connection_cancel(conn);
-			}
-		} else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
-			hci_reject_sco(conn);
-		}
-		break;
-	case BT_CONFIG:
-	case BT_CONNECTED:
-		if (conn->type == AMP_LINK) {
-			hci_amp_disconn(conn);
-		} else {
-			__u8 reason = hci_proto_disconn_ind(conn);
-			hci_disconnect(conn, reason);
-		}
-		break;
-	default:
-		conn->state = BT_CLOSED;
-		break;
+	/* LE connections in scanning state need special handling */
+	if (conn->state == BT_CONNECT && conn->type == LE_LINK &&
+	    test_bit(HCI_CONN_SCANNING, &conn->flags)) {
+		hci_connect_le_scan_remove(conn);
+		return;
 	}
+
+	hci_abort_conn(conn, hci_proto_disconn_ind(conn));
 }
 
 /* Enter sniff mode */
@@ -517,7 +484,7 @@ static void le_conn_timeout(struct work_struct *work)
 		return;
 	}
 
-	hci_le_create_connection_cancel(conn);
+	hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
 }
 
 struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
@@ -580,6 +547,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
 	INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept);
 	INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle);
 	INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout);
+	INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup);
 
 	atomic_set(&conn->refcnt, 0);
 
@@ -835,7 +803,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
 	 * attempt, we simply update pending_sec_level and auth_type fields
 	 * and return the object found.
 	 */
-	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
+	conn = hci_conn_hash_lookup_le(hdev, dst, dst_type);
 	conn_unfinished = NULL;
 	if (conn) {
 		if (conn->state == BT_CONNECT &&
@@ -985,13 +953,10 @@ static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
 {
 	struct hci_conn *conn;
 
-	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+	conn = hci_conn_hash_lookup_le(hdev, addr, type);
 	if (!conn)
 		return false;
 
-	if (conn->dst_type != type)
-		return false;
-
 	if (conn->state != BT_CONNECTED)
 		return false;
 
@@ -1064,7 +1029,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
 	 * attempt, we simply update pending_sec_level and auth_type fields
 	 * and return the object found.
 	 */
-	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
+	conn = hci_conn_hash_lookup_le(hdev, dst, dst_type);
 	if (conn) {
 		if (conn->pending_sec_level < sec_level)
 			conn->pending_sec_level = sec_level;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e837539452fb..62edbf1b114e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -65,13 +65,6 @@ static DEFINE_IDA(hci_index_ida);
 #define hci_req_lock(d)		mutex_lock(&d->req_lock)
 #define hci_req_unlock(d)	mutex_unlock(&d->req_lock)
 
-/* ---- HCI notifications ---- */
-
-static void hci_notify(struct hci_dev *hdev, int event)
-{
-	hci_sock_dev_event(hdev, event);
-}
-
 /* ---- HCI debugfs entries ---- */
 
 static ssize_t dut_mode_read(struct file *file, char __user *user_buf,
@@ -134,6 +127,77 @@ static const struct file_operations dut_mode_fops = {
 	.llseek		= default_llseek,
 };
 
+static ssize_t vendor_diag_read(struct file *file, char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
+				 size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[32];
+	size_t buf_size = min(count, (sizeof(buf)-1));
+	bool enable;
+	int err;
+
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	if (strtobool(buf, &enable))
+		return -EINVAL;
+
+	/* When the diagnostic flags are not persistent and the transport
+	 * is not active, then there is no need for the vendor callback.
+	 *
+	 * Instead just store the desired value. If needed the setting
+	 * will be programmed when the controller gets powered on.
+	 */
+	if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+	    !test_bit(HCI_RUNNING, &hdev->flags))
+		goto done;
+
+	hci_req_lock(hdev);
+	err = hdev->set_diag(hdev, enable);
+	hci_req_unlock(hdev);
+
+	if (err < 0)
+		return err;
+
+done:
+	if (enable)
+		hci_dev_set_flag(hdev, HCI_VENDOR_DIAG);
+	else
+		hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG);
+
+	return count;
+}
+
+static const struct file_operations vendor_diag_fops = {
+	.open		= simple_open,
+	.read		= vendor_diag_read,
+	.write		= vendor_diag_write,
+	.llseek		= default_llseek,
+};
+
+static void hci_debugfs_create_basic(struct hci_dev *hdev)
+{
+	debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
+			    &dut_mode_fops);
+
+	if (hdev->set_diag)
+		debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev,
+				    &vendor_diag_fops);
+}
+
 /* ---- HCI requests ---- */
 
 static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
@@ -444,12 +508,6 @@ static void le_setup(struct hci_request *req)
 	/* Read LE Supported States */
 	hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL);
 
-	/* Read LE White List Size */
-	hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL);
-
-	/* Clear LE White List */
-	hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
-
 	/* LE-only controllers have LE implicitly enabled */
 	if (!lmp_bredr_capable(hdev))
 		hci_dev_set_flag(hdev, HCI_LE_ENABLED);
@@ -693,7 +751,8 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
 
 	hci_setup_event_mask(req);
 
-	if (hdev->commands[6] & 0x20) {
+	if (hdev->commands[6] & 0x20 &&
+	    !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) {
 		struct hci_cp_read_stored_link_key cp;
 
 		bacpy(&cp.bdaddr, BDADDR_ANY);
@@ -767,6 +826,17 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
 			hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL);
 		}
 
+		if (hdev->commands[26] & 0x40) {
+			/* Read LE White List Size */
+			hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE,
+				    0, NULL);
+		}
+
+		if (hdev->commands[26] & 0x80) {
+			/* Clear LE White List */
+			hci_req_add(req, HCI_OP_LE_CLEAR_WHITE_LIST, 0, NULL);
+		}
+
 		if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) {
 			/* Read LE Maximum Data Length */
 			hci_req_add(req, HCI_OP_LE_READ_MAX_DATA_LEN, 0, NULL);
@@ -849,13 +919,8 @@ static int __hci_init(struct hci_dev *hdev)
 	if (err < 0)
 		return err;
 
-	/* The Device Under Test (DUT) mode is special and available for
-	 * all controller types. So just create it early on.
-	 */
-	if (hci_dev_test_flag(hdev, HCI_SETUP)) {
-		debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
-				    &dut_mode_fops);
-	}
+	if (hci_dev_test_flag(hdev, HCI_SETUP))
+		hci_debugfs_create_basic(hdev);
 
 	err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
 	if (err < 0)
@@ -932,6 +997,9 @@ static int __hci_unconf_init(struct hci_dev *hdev)
 	if (err < 0)
 		return err;
 
+	if (hci_dev_test_flag(hdev, HCI_SETUP))
+		hci_debugfs_create_basic(hdev);
+
 	return 0;
 }
 
@@ -1384,10 +1452,15 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		goto done;
 	}
 
+	set_bit(HCI_RUNNING, &hdev->flags);
+	hci_sock_dev_event(hdev, HCI_DEV_OPEN);
+
 	atomic_set(&hdev->cmd_cnt, 1);
 	set_bit(HCI_INIT, &hdev->flags);
 
 	if (hci_dev_test_flag(hdev, HCI_SETUP)) {
+		hci_sock_dev_event(hdev, HCI_DEV_SETUP);
+
 		if (hdev->setup)
 			ret = hdev->setup(hdev);
 
@@ -1428,17 +1501,28 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 
 	if (!ret) {
 		if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
-		    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
+		    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 			ret = __hci_init(hdev);
+			if (!ret && hdev->post_init)
+				ret = hdev->post_init(hdev);
+		}
 	}
 
+	/* If the HCI Reset command is clearing all diagnostic settings,
+	 * then they need to be reprogrammed after the init procedure
+	 * completed.
+	 */
+	if (test_bit(HCI_QUIRK_NON_PERSISTENT_DIAG, &hdev->quirks) &&
+	    hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
+		ret = hdev->set_diag(hdev, true);
+
 	clear_bit(HCI_INIT, &hdev->flags);
 
 	if (!ret) {
 		hci_dev_hold(hdev);
 		hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 		set_bit(HCI_UP, &hdev->flags);
-		hci_notify(hdev, HCI_DEV_UP);
+		hci_sock_dev_event(hdev, HCI_DEV_UP);
 		if (!hci_dev_test_flag(hdev, HCI_SETUP) &&
 		    !hci_dev_test_flag(hdev, HCI_CONFIG) &&
 		    !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
@@ -1465,6 +1549,9 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 			hdev->sent_cmd = NULL;
 		}
 
+		clear_bit(HCI_RUNNING, &hdev->flags);
+		hci_sock_dev_event(hdev, HCI_DEV_CLOSE);
+
 		hdev->close(hdev);
 		hdev->flags &= BIT(HCI_RAW);
 	}
@@ -1548,8 +1635,10 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
 	BT_DBG("All LE pending actions cleared");
 }
 
-static int hci_dev_do_close(struct hci_dev *hdev)
+int hci_dev_do_close(struct hci_dev *hdev)
 {
+	bool auto_off;
+
 	BT_DBG("%s %p", hdev->name, hdev);
 
 	if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
@@ -1605,10 +1694,10 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
 
-	if (!hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
-		if (hdev->dev_type == HCI_BREDR)
-			mgmt_powered(hdev, 0);
-	}
+	auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
+
+	if (!auto_off && hdev->dev_type == HCI_BREDR)
+		mgmt_powered(hdev, 0);
 
 	hci_inquiry_cache_flush(hdev);
 	hci_pend_le_actions_clear(hdev);
@@ -1617,7 +1706,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 
 	smp_unregister(hdev);
 
-	hci_notify(hdev, HCI_DEV_DOWN);
+	hci_sock_dev_event(hdev, HCI_DEV_DOWN);
 
 	if (hdev->flush)
 		hdev->flush(hdev);
@@ -1625,9 +1714,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	/* Reset device */
 	skb_queue_purge(&hdev->cmd_q);
 	atomic_set(&hdev->cmd_cnt, 1);
-	if (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) &&
-	    !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
-	    test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
+	if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) &&
+	    !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
 		set_bit(HCI_INIT, &hdev->flags);
 		__hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
 		clear_bit(HCI_INIT, &hdev->flags);
@@ -1648,6 +1736,9 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 		hdev->sent_cmd = NULL;
 	}
 
+	clear_bit(HCI_RUNNING, &hdev->flags);
+	hci_sock_dev_event(hdev, HCI_DEV_CLOSE);
+
 	/* After this point our queues are empty
 	 * and no tasks are scheduled. */
 	hdev->close(hdev);
@@ -2848,23 +2939,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
 }
 
 /* This function requires the caller holds hdev->lock */
-struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev,
-						    bdaddr_t *addr,
-						    u8 addr_type)
-{
-	struct hci_conn_params *param;
-
-	list_for_each_entry(param, &hdev->pend_le_conns, action) {
-		if (bacmp(&param->addr, addr) == 0 &&
-		    param->addr_type == addr_type &&
-		    param->explicit_connect)
-			return param;
-	}
-
-	return NULL;
-}
-
-/* This function requires the caller holds hdev->lock */
 struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
 					    bdaddr_t *addr, u8 addr_type)
 {
@@ -3338,7 +3412,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks))
 		hci_dev_set_flag(hdev, HCI_UNCONFIGURED);
 
-	hci_notify(hdev, HCI_DEV_REG);
+	hci_sock_dev_event(hdev, HCI_DEV_REG);
 	hci_dev_hold(hdev);
 
 	queue_work(hdev->req_workqueue, &hdev->power_on);
@@ -3386,7 +3460,7 @@ void hci_unregister_dev(struct hci_dev *hdev)
 	 * pending list */
 	BUG_ON(!list_empty(&hdev->mgmt_pending));
 
-	hci_notify(hdev, HCI_DEV_UNREG);
+	hci_sock_dev_event(hdev, HCI_DEV_UNREG);
 
 	if (hdev->rfkill) {
 		rfkill_unregister(hdev->rfkill);
@@ -3423,7 +3497,7 @@ EXPORT_SYMBOL(hci_unregister_dev);
 /* Suspend HCI device */
 int hci_suspend_dev(struct hci_dev *hdev)
 {
-	hci_notify(hdev, HCI_DEV_SUSPEND);
+	hci_sock_dev_event(hdev, HCI_DEV_SUSPEND);
 	return 0;
 }
 EXPORT_SYMBOL(hci_suspend_dev);
@@ -3431,7 +3505,7 @@ EXPORT_SYMBOL(hci_suspend_dev);
 /* Resume HCI device */
 int hci_resume_dev(struct hci_dev *hdev)
 {
-	hci_notify(hdev, HCI_DEV_RESUME);
+	hci_sock_dev_event(hdev, HCI_DEV_RESUME);
 	return 0;
 }
 EXPORT_SYMBOL(hci_resume_dev);
@@ -3463,6 +3537,13 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
 		return -ENXIO;
 	}
 
+	if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
+	    bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
+	    bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
 	/* Incoming skb */
 	bt_cb(skb)->incoming = 1;
 
@@ -3476,6 +3557,22 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(hci_recv_frame);
 
+/* Receive diagnostic message from HCI drivers */
+int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	/* Mark as diagnostic packet */
+	bt_cb(skb)->pkt_type = HCI_DIAG_PKT;
+
+	/* Time stamp */
+	__net_timestamp(skb);
+
+	skb_queue_tail(&hdev->rx_q, skb);
+	queue_work(hdev->workqueue, &hdev->rx_work);
+
+	return 0;
+}
+EXPORT_SYMBOL(hci_recv_diag);
+
 /* ---- Interface to upper protocols ---- */
 
 int hci_register_cb(struct hci_cb *cb)
@@ -3522,6 +3619,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	/* Get rid of skb owner, prior to sending to the driver. */
 	skb_orphan(skb);
 
+	if (!test_bit(HCI_RUNNING, &hdev->flags)) {
+		kfree_skb(skb);
+		return;
+	}
+
 	err = hdev->send(hdev, skb);
 	if (err < 0) {
 		BT_ERR("%s sending frame failed (%d)", hdev->name, err);
@@ -3546,7 +3648,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 	/* Stand-alone HCI commands must be flagged as
 	 * single-command requests.
 	 */
-	bt_cb(skb)->req.start = true;
+	bt_cb(skb)->hci.req_start = true;
 
 	skb_queue_tail(&hdev->cmd_q, skb);
 	queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -3572,6 +3674,25 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
 	return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE;
 }
 
+/* Send HCI command and wait for command commplete event */
+struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+			     const void *param, u32 timeout)
+{
+	struct sk_buff *skb;
+
+	if (!test_bit(HCI_UP, &hdev->flags))
+		return ERR_PTR(-ENETDOWN);
+
+	bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen);
+
+	hci_req_lock(hdev);
+	skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout);
+	hci_req_unlock(hdev);
+
+	return skb;
+}
+EXPORT_SYMBOL(hci_cmd_sync);
+
 /* Send ACL data */
 static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
 {
@@ -4224,7 +4345,7 @@ static bool hci_req_is_complete(struct hci_dev *hdev)
 	if (!skb)
 		return true;
 
-	return bt_cb(skb)->req.start;
+	return bt_cb(skb)->hci.req_start;
 }
 
 static void hci_resend_last(struct hci_dev *hdev)
@@ -4284,26 +4405,26 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
 	 * callback would be found in hdev->sent_cmd instead of the
 	 * command queue (hdev->cmd_q).
 	 */
-	if (bt_cb(hdev->sent_cmd)->req.complete) {
-		*req_complete = bt_cb(hdev->sent_cmd)->req.complete;
+	if (bt_cb(hdev->sent_cmd)->hci.req_complete) {
+		*req_complete = bt_cb(hdev->sent_cmd)->hci.req_complete;
 		return;
 	}
 
-	if (bt_cb(hdev->sent_cmd)->req.complete_skb) {
-		*req_complete_skb = bt_cb(hdev->sent_cmd)->req.complete_skb;
+	if (bt_cb(hdev->sent_cmd)->hci.req_complete_skb) {
+		*req_complete_skb = bt_cb(hdev->sent_cmd)->hci.req_complete_skb;
 		return;
 	}
 
 	/* Remove all pending commands belonging to this request */
 	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
 	while ((skb = __skb_dequeue(&hdev->cmd_q))) {
-		if (bt_cb(skb)->req.start) {
+		if (bt_cb(skb)->hci.req_start) {
 			__skb_queue_head(&hdev->cmd_q, skb);
 			break;
 		}
 
-		*req_complete = bt_cb(skb)->req.complete;
-		*req_complete_skb = bt_cb(skb)->req.complete_skb;
+		*req_complete = bt_cb(skb)->hci.req_complete;
+		*req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
 		kfree_skb(skb);
 	}
 	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index bc31099d3b5b..d57c11c1c6b5 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1915,7 +1915,8 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
 
 	hci_dev_lock(hdev);
 
-	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->peer_addr);
+	conn = hci_conn_hash_lookup_le(hdev, &cp->peer_addr,
+				       cp->peer_addr_type);
 	if (!conn)
 		goto unlock;
 
@@ -3137,7 +3138,7 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
 	 * complete event).
 	 */
 	if (ev->status ||
-	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event))
+	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->hci.req_event))
 		hci_req_cmd_complete(hdev, *opcode, ev->status, req_complete,
 				     req_complete_skb);
 
@@ -4724,6 +4725,27 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 	struct hci_conn *conn;
 	bool match;
 	u32 flags;
+	u8 *ptr, real_len;
+
+	/* Find the end of the data in case the report contains padded zero
+	 * bytes at the end causing an invalid length value.
+	 *
+	 * When data is NULL, len is 0 so there is no need for extra ptr
+	 * check as 'ptr < data + 0' is already false in such case.
+	 */
+	for (ptr = data; ptr < data + len && *ptr; ptr += *ptr + 1) {
+		if (ptr + 1 + *ptr > data + len)
+			break;
+	}
+
+	real_len = ptr - data;
+
+	/* Adjust for actual length */
+	if (len != real_len) {
+		BT_ERR_RATELIMITED("%s advertising data length corrected",
+				   hdev->name);
+		len = real_len;
+	}
 
 	/* If the direct address is present, then this report is from
 	 * a LE Direct Advertising Report event. In that case it is
@@ -5187,7 +5209,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 	u8 status = 0, event = hdr->evt, req_evt = 0;
 	u16 opcode = HCI_OP_NOP;
 
-	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) {
+	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->hci.req_event == event) {
 		struct hci_command_hdr *cmd_hdr = (void *) hdev->sent_cmd->data;
 		opcode = __le16_to_cpu(cmd_hdr->opcode);
 		hci_req_cmd_complete(hdev, opcode, status, &req_complete,
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index b7369220c9ef..981f8a202c27 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -56,8 +56,8 @@ static int req_run(struct hci_request *req, hci_req_complete_t complete,
 		return -ENODATA;
 
 	skb = skb_peek_tail(&req->cmd_q);
-	bt_cb(skb)->req.complete = complete;
-	bt_cb(skb)->req.complete_skb = complete_skb;
+	bt_cb(skb)->hci.req_complete = complete;
+	bt_cb(skb)->hci.req_complete_skb = complete_skb;
 
 	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
 	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
@@ -99,7 +99,7 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
 	BT_DBG("skb len %d", skb->len);
 
 	bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
-	bt_cb(skb)->opcode = opcode;
+	bt_cb(skb)->hci.opcode = opcode;
 
 	return skb;
 }
@@ -128,9 +128,9 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
 	}
 
 	if (skb_queue_empty(&req->cmd_q))
-		bt_cb(skb)->req.start = true;
+		bt_cb(skb)->hci.req_start = true;
 
-	bt_cb(skb)->req.event = event;
+	bt_cb(skb)->hci.req_event = event;
 
 	skb_queue_tail(&req->cmd_q, skb);
 }
@@ -564,3 +564,96 @@ void hci_update_background_scan(struct hci_dev *hdev)
 	if (err && err != -ENODATA)
 		BT_ERR("Failed to run HCI request: err %d", err);
 }
+
+void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
+		      u8 reason)
+{
+	switch (conn->state) {
+	case BT_CONNECTED:
+	case BT_CONFIG:
+		if (conn->type == AMP_LINK) {
+			struct hci_cp_disconn_phy_link cp;
+
+			cp.phy_handle = HCI_PHY_HANDLE(conn->handle);
+			cp.reason = reason;
+			hci_req_add(req, HCI_OP_DISCONN_PHY_LINK, sizeof(cp),
+				    &cp);
+		} else {
+			struct hci_cp_disconnect dc;
+
+			dc.handle = cpu_to_le16(conn->handle);
+			dc.reason = reason;
+			hci_req_add(req, HCI_OP_DISCONNECT, sizeof(dc), &dc);
+		}
+
+		conn->state = BT_DISCONN;
+
+		break;
+	case BT_CONNECT:
+		if (conn->type == LE_LINK) {
+			if (test_bit(HCI_CONN_SCANNING, &conn->flags))
+				break;
+			hci_req_add(req, HCI_OP_LE_CREATE_CONN_CANCEL,
+				    0, NULL);
+		} else if (conn->type == ACL_LINK) {
+			if (req->hdev->hci_ver < BLUETOOTH_VER_1_2)
+				break;
+			hci_req_add(req, HCI_OP_CREATE_CONN_CANCEL,
+				    6, &conn->dst);
+		}
+		break;
+	case BT_CONNECT2:
+		if (conn->type == ACL_LINK) {
+			struct hci_cp_reject_conn_req rej;
+
+			bacpy(&rej.bdaddr, &conn->dst);
+			rej.reason = reason;
+
+			hci_req_add(req, HCI_OP_REJECT_CONN_REQ,
+				    sizeof(rej), &rej);
+		} else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
+			struct hci_cp_reject_sync_conn_req rej;
+
+			bacpy(&rej.bdaddr, &conn->dst);
+
+			/* SCO rejection has its own limited set of
+			 * allowed error values (0x0D-0x0F) which isn't
+			 * compatible with most values passed to this
+			 * function. To be safe hard-code one of the
+			 * values that's suitable for SCO.
+			 */
+			rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES;
+
+			hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ,
+				    sizeof(rej), &rej);
+		}
+		break;
+	default:
+		conn->state = BT_CLOSED;
+		break;
+	}
+}
+
+static void abort_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
+{
+	if (status)
+		BT_DBG("Failed to abort connection: status 0x%2.2x", status);
+}
+
+int hci_abort_conn(struct hci_conn *conn, u8 reason)
+{
+	struct hci_request req;
+	int err;
+
+	hci_req_init(&req, conn->hdev);
+
+	__hci_abort_conn(&req, conn, reason);
+
+	err = hci_req_run(&req, abort_conn_complete);
+	if (err && err != -ENODATA) {
+		BT_ERR("Failed to run HCI request: err %d", err);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index bf6df92f42db..25c7f1305dcb 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -55,3 +55,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 
 void hci_update_background_scan(struct hci_dev *hdev);
 void __hci_update_background_scan(struct hci_request *req);
+
+int hci_abort_conn(struct hci_conn *conn, u8 reason);
+void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn,
+		      u8 reason);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f2d30d1156c9..b1eb8c09a660 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -120,10 +120,7 @@ static bool is_filtered_packet(struct sock *sk, struct sk_buff *skb)
 	/* Apply filter */
 	flt = &hci_pi(sk)->filter;
 
-	if (bt_cb(skb)->pkt_type == HCI_VENDOR_PKT)
-		flt_type = 0;
-	else
-		flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS;
+	flt_type = bt_cb(skb)->pkt_type & HCI_FLT_TYPE_BITS;
 
 	if (!test_bit(flt_type, &flt->type_mask))
 		return true;
@@ -173,6 +170,11 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 			continue;
 
 		if (hci_pi(sk)->channel == HCI_CHANNEL_RAW) {
+			if (bt_cb(skb)->pkt_type != HCI_COMMAND_PKT &&
+			    bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
+			    bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
+			    bt_cb(skb)->pkt_type != HCI_SCODATA_PKT)
+				continue;
 			if (is_filtered_packet(sk, skb))
 				continue;
 		} else if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
@@ -279,6 +281,9 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 		else
 			opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT);
 		break;
+	case HCI_DIAG_PKT:
+		opcode = cpu_to_le16(HCI_MON_VENDOR_DIAG);
+		break;
 	default:
 		return;
 	}
@@ -303,6 +308,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
 {
 	struct hci_mon_hdr *hdr;
 	struct hci_mon_new_index *ni;
+	struct hci_mon_index_info *ii;
 	struct sk_buff *skb;
 	__le16 opcode;
 
@@ -312,7 +318,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
 		if (!skb)
 			return NULL;
 
-		ni = (void *) skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
+		ni = (void *)skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
 		ni->type = hdev->dev_type;
 		ni->bus = hdev->bus;
 		bacpy(&ni->bdaddr, &hdev->bdaddr);
@@ -329,6 +335,40 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
 		opcode = cpu_to_le16(HCI_MON_DEL_INDEX);
 		break;
 
+	case HCI_DEV_SETUP:
+		if (hdev->manufacturer == 0xffff)
+			return NULL;
+
+		/* fall through */
+
+	case HCI_DEV_UP:
+		skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC);
+		if (!skb)
+			return NULL;
+
+		ii = (void *)skb_put(skb, HCI_MON_INDEX_INFO_SIZE);
+		bacpy(&ii->bdaddr, &hdev->bdaddr);
+		ii->manufacturer = cpu_to_le16(hdev->manufacturer);
+
+		opcode = cpu_to_le16(HCI_MON_INDEX_INFO);
+		break;
+
+	case HCI_DEV_OPEN:
+		skb = bt_skb_alloc(0, GFP_ATOMIC);
+		if (!skb)
+			return NULL;
+
+		opcode = cpu_to_le16(HCI_MON_OPEN_INDEX);
+		break;
+
+	case HCI_DEV_CLOSE:
+		skb = bt_skb_alloc(0, GFP_ATOMIC);
+		if (!skb)
+			return NULL;
+
+		opcode = cpu_to_le16(HCI_MON_CLOSE_INDEX);
+		break;
+
 	default:
 		return NULL;
 	}
@@ -358,6 +398,28 @@ static void send_monitor_replay(struct sock *sk)
 
 		if (sock_queue_rcv_skb(sk, skb))
 			kfree_skb(skb);
+
+		if (!test_bit(HCI_RUNNING, &hdev->flags))
+			continue;
+
+		skb = create_monitor_event(hdev, HCI_DEV_OPEN);
+		if (!skb)
+			continue;
+
+		if (sock_queue_rcv_skb(sk, skb))
+			kfree_skb(skb);
+
+		if (test_bit(HCI_UP, &hdev->flags))
+			skb = create_monitor_event(hdev, HCI_DEV_UP);
+		else if (hci_dev_test_flag(hdev, HCI_SETUP))
+			skb = create_monitor_event(hdev, HCI_DEV_SETUP);
+		else
+			skb = NULL;
+
+		if (skb) {
+			if (sock_queue_rcv_skb(sk, skb))
+				kfree_skb(skb);
+		}
 	}
 
 	read_unlock(&hci_dev_list_lock);
@@ -392,14 +454,12 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
 
 void hci_sock_dev_event(struct hci_dev *hdev, int event)
 {
-	struct hci_ev_si_device ev;
-
 	BT_DBG("hdev %s event %d", hdev->name, event);
 
-	/* Send event to monitor */
 	if (atomic_read(&monitor_promisc)) {
 		struct sk_buff *skb;
 
+		/* Send event to monitor */
 		skb = create_monitor_event(hdev, event);
 		if (skb) {
 			hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
@@ -408,10 +468,14 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
 		}
 	}
 
-	/* Send event to sockets */
-	ev.event  = event;
-	ev.dev_id = hdev->id;
-	hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev);
+	if (event <= HCI_DEV_DOWN) {
+		struct hci_ev_si_device ev;
+
+		/* Send event to sockets */
+		ev.event  = event;
+		ev.dev_id = hdev->id;
+		hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev);
+	}
 
 	if (event == HCI_DEV_UNREG) {
 		struct sock *sk;
@@ -503,7 +567,16 @@ static int hci_sock_release(struct socket *sock)
 
 	if (hdev) {
 		if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
-			hci_dev_close(hdev->id);
+			/* When releasing an user channel exclusive access,
+			 * call hci_dev_do_close directly instead of calling
+			 * hci_dev_close to ensure the exclusive access will
+			 * be released and the controller brought back down.
+			 *
+			 * The checking of HCI_AUTO_OFF is not needed in this
+			 * case since it will have been cleared already when
+			 * opening the user channel.
+			 */
+			hci_dev_do_close(hdev);
 			hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
 			mgmt_index_added(hdev);
 		}
@@ -928,7 +1001,7 @@ static int hci_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	if (flags & (MSG_OOB))
+	if (flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
 	if (sk->sk_state == BT_CLOSED)
@@ -1176,7 +1249,7 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 			/* Stand-alone HCI commands must be flagged as
 			 * single-command requests.
 			 */
-			bt_cb(skb)->req.start = true;
+			bt_cb(skb)->hci.req_start = true;
 
 			skb_queue_tail(&hdev->cmd_q, skb);
 			queue_work(hdev->workqueue, &hdev->cmd_work);
@@ -1187,6 +1260,12 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 			goto drop;
 		}
 
+		if (bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
+		    bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
+			err = -EINVAL;
+			goto drop;
+		}
+
 		skb_queue_tail(&hdev->raw_q, skb);
 		queue_work(hdev->workqueue, &hdev->tx_work);
 	}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index f1a117f8cad2..0bec4588c3c8 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -401,6 +401,20 @@ static void hidp_idle_timeout(unsigned long arg)
 {
 	struct hidp_session *session = (struct hidp_session *) arg;
 
+	/* The HIDP user-space API only contains calls to add and remove
+	 * devices. There is no way to forward events of any kind. Therefore,
+	 * we have to forcefully disconnect a device on idle-timeouts. This is
+	 * unfortunate and weird API design, but it is spec-compliant and
+	 * required for backwards-compatibility. Hence, on idle-timeout, we
+	 * signal driver-detach events, so poll() will be woken up with an
+	 * error-condition on both sockets.
+	 */
+
+	session->intr_sock->sk->sk_err = EUNATCH;
+	session->ctrl_sock->sk->sk_err = EUNATCH;
+	wake_up_interruptible(sk_sleep(session->intr_sock->sk));
+	wake_up_interruptible(sk_sleep(session->ctrl_sock->sk));
+
 	hidp_session_terminate(session);
 }
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 7c65ee200c29..66e8b6ee19a5 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -239,7 +239,7 @@ static u16 l2cap_alloc_cid(struct l2cap_conn *conn)
 	else
 		dyn_end = L2CAP_CID_DYN_END;
 
-	for (cid = L2CAP_CID_DYN_START; cid < dyn_end; cid++) {
+	for (cid = L2CAP_CID_DYN_START; cid <= dyn_end; cid++) {
 		if (!__l2cap_get_chan_by_scid(conn, cid))
 			return cid;
 	}
@@ -5250,7 +5250,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
 	credits = __le16_to_cpu(rsp->credits);
 	result  = __le16_to_cpu(rsp->result);
 
-	if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23))
+	if (result == L2CAP_CR_SUCCESS && (mtu < 23 || mps < 23 ||
+					   dcid < L2CAP_CID_DYN_START ||
+					   dcid > L2CAP_CID_LE_DYN_END))
 		return -EPROTO;
 
 	BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x",
@@ -5270,6 +5272,11 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn,
 
 	switch (result) {
 	case L2CAP_CR_SUCCESS:
+		if (__l2cap_get_chan_by_dcid(conn, dcid)) {
+			err = -EBADSLT;
+			break;
+		}
+
 		chan->ident = 0;
 		chan->dcid = dcid;
 		chan->omtu = mtu;
@@ -5437,9 +5444,16 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
 		goto response_unlock;
 	}
 
+	/* Check for valid dynamic CID range */
+	if (scid < L2CAP_CID_DYN_START || scid > L2CAP_CID_LE_DYN_END) {
+		result = L2CAP_CR_INVALID_SCID;
+		chan = NULL;
+		goto response_unlock;
+	}
+
 	/* Check if we already have channel with that dcid */
 	if (__l2cap_get_chan_by_dcid(conn, scid)) {
-		result = L2CAP_CR_NO_MEM;
+		result = L2CAP_CR_SCID_IN_USE;
 		chan = NULL;
 		goto response_unlock;
 	}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 586b3d580cfc..1bb551527044 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1111,53 +1111,76 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 	if (!sk)
 		return 0;
 
+	lock_sock(sk);
+
+	if (sk->sk_shutdown)
+		goto shutdown_already;
+
+	BT_DBG("Handling sock shutdown");
+
 	/* prevent sk structure from being freed whilst unlocked */
 	sock_hold(sk);
 
 	chan = l2cap_pi(sk)->chan;
 	/* prevent chan structure from being freed whilst unlocked */
 	l2cap_chan_hold(chan);
-	conn = chan->conn;
 
 	BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
 
+	if (chan->mode == L2CAP_MODE_ERTM &&
+	    chan->unacked_frames > 0 &&
+	    chan->state == BT_CONNECTED) {
+		err = __l2cap_wait_ack(sk, chan);
+
+		/* After waiting for ACKs, check whether shutdown
+		 * has already been actioned to close the L2CAP
+		 * link such as by l2cap_disconnection_req().
+		 */
+		if (sk->sk_shutdown)
+			goto has_shutdown;
+	}
+
+	sk->sk_shutdown = SHUTDOWN_MASK;
+	release_sock(sk);
+
+	l2cap_chan_lock(chan);
+	conn = chan->conn;
+	if (conn)
+		/* prevent conn structure from being freed */
+		l2cap_conn_get(conn);
+	l2cap_chan_unlock(chan);
+
 	if (conn)
+		/* mutex lock must be taken before l2cap_chan_lock() */
 		mutex_lock(&conn->chan_lock);
 
 	l2cap_chan_lock(chan);
-	lock_sock(sk);
+	l2cap_chan_close(chan, 0);
+	l2cap_chan_unlock(chan);
 
-	if (!sk->sk_shutdown) {
-		if (chan->mode == L2CAP_MODE_ERTM &&
-		    chan->unacked_frames > 0 &&
-		    chan->state == BT_CONNECTED)
-			err = __l2cap_wait_ack(sk, chan);
+	if (conn) {
+		mutex_unlock(&conn->chan_lock);
+		l2cap_conn_put(conn);
+	}
 
-		sk->sk_shutdown = SHUTDOWN_MASK;
+	lock_sock(sk);
 
-		release_sock(sk);
-		l2cap_chan_close(chan, 0);
-		lock_sock(sk);
+	if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
+	    !(current->flags & PF_EXITING))
+		err = bt_sock_wait_state(sk, BT_CLOSED,
+					 sk->sk_lingertime);
 
-		if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime &&
-		    !(current->flags & PF_EXITING))
-			err = bt_sock_wait_state(sk, BT_CLOSED,
-						 sk->sk_lingertime);
-	}
+has_shutdown:
+	l2cap_chan_put(chan);
+	sock_put(sk);
 
+shutdown_already:
 	if (!err && sk->sk_err)
 		err = -sk->sk_err;
 
 	release_sock(sk);
-	l2cap_chan_unlock(chan);
-
-	if (conn)
-		mutex_unlock(&conn->chan_lock);
-
-	l2cap_chan_put(chan);
-	sock_put(sk);
 
-	BT_DBG("err: %d", err);
+	BT_DBG("Sock shutdown complete err: %d", err);
 
 	return err;
 }
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index b36bc0415854..aa4cf64e32a6 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -151,6 +151,22 @@ void bt_info(const char *format, ...)
 }
 EXPORT_SYMBOL(bt_info);
 
+void bt_warn(const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	pr_warn("%pV", &vaf);
+
+	va_end(args);
+}
+EXPORT_SYMBOL(bt_warn);
+
 void bt_err(const char *format, ...)
 {
 	struct va_format vaf;
@@ -166,3 +182,19 @@ void bt_err(const char *format, ...)
 	va_end(args);
 }
 EXPORT_SYMBOL(bt_err);
+
+void bt_err_ratelimited(const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	pr_err_ratelimited("%pV", &vaf);
+
+	va_end(args);
+}
+EXPORT_SYMBOL(bt_err_ratelimited);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c4fe2fee753f..7f22119276f3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -268,6 +268,14 @@ static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len,
 			       HCI_SOCK_TRUSTED, skip_sk);
 }
 
+static u8 le_addr_type(u8 mgmt_addr_type)
+{
+	if (mgmt_addr_type == BDADDR_LE_PUBLIC)
+		return ADDR_LE_DEV_PUBLIC;
+	else
+		return ADDR_LE_DEV_RANDOM;
+}
+
 static int read_version(struct sock *sk, struct hci_dev *hdev, void *data,
 			u16 data_len)
 {
@@ -1631,35 +1639,8 @@ static int clean_up_hci_state(struct hci_dev *hdev)
 	discov_stopped = hci_stop_discovery(&req);
 
 	list_for_each_entry(conn, &hdev->conn_hash.list, list) {
-		struct hci_cp_disconnect dc;
-		struct hci_cp_reject_conn_req rej;
-
-		switch (conn->state) {
-		case BT_CONNECTED:
-		case BT_CONFIG:
-			dc.handle = cpu_to_le16(conn->handle);
-			dc.reason = 0x15; /* Terminated due to Power Off */
-			hci_req_add(&req, HCI_OP_DISCONNECT, sizeof(dc), &dc);
-			break;
-		case BT_CONNECT:
-			if (conn->type == LE_LINK)
-				hci_req_add(&req, HCI_OP_LE_CREATE_CONN_CANCEL,
-					    0, NULL);
-			else if (conn->type == ACL_LINK)
-				hci_req_add(&req, HCI_OP_CREATE_CONN_CANCEL,
-					    6, &conn->dst);
-			break;
-		case BT_CONNECT2:
-			bacpy(&rej.bdaddr, &conn->dst);
-			rej.reason = 0x15; /* Terminated due to Power Off */
-			if (conn->type == ACL_LINK)
-				hci_req_add(&req, HCI_OP_REJECT_CONN_REQ,
-					    sizeof(rej), &rej);
-			else if (conn->type == SCO_LINK)
-				hci_req_add(&req, HCI_OP_REJECT_SYNC_CONN_REQ,
-					    sizeof(rej), &rej);
-			break;
-		}
+		/* 0x15 == Terminated due to Power Off */
+		__hci_abort_conn(&req, conn, 0x15);
 	}
 
 	err = hci_req_run(&req, clean_up_hci_complete);
@@ -3044,9 +3025,10 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 {
 	struct mgmt_cp_unpair_device *cp = data;
 	struct mgmt_rp_unpair_device rp;
-	struct hci_cp_disconnect dc;
+	struct hci_conn_params *params;
 	struct mgmt_pending_cmd *cmd;
 	struct hci_conn *conn;
+	u8 addr_type;
 	int err;
 
 	memset(&rp, 0, sizeof(rp));
@@ -3087,36 +3069,23 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 			conn = NULL;
 
 		err = hci_remove_link_key(hdev, &cp->addr.bdaddr);
-	} else {
-		u8 addr_type;
-
-		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK,
-					       &cp->addr.bdaddr);
-		if (conn) {
-			/* Defer clearing up the connection parameters
-			 * until closing to give a chance of keeping
-			 * them if a repairing happens.
-			 */
-			set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
-
-			/* If disconnection is not requested, then
-			 * clear the connection variable so that the
-			 * link is not terminated.
-			 */
-			if (!cp->disconnect)
-				conn = NULL;
+		if (err < 0) {
+			err = mgmt_cmd_complete(sk, hdev->id,
+						MGMT_OP_UNPAIR_DEVICE,
+						MGMT_STATUS_NOT_PAIRED, &rp,
+						sizeof(rp));
+			goto unlock;
 		}
 
-		if (cp->addr.type == BDADDR_LE_PUBLIC)
-			addr_type = ADDR_LE_DEV_PUBLIC;
-		else
-			addr_type = ADDR_LE_DEV_RANDOM;
+		goto done;
+	}
 
-		hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
+	/* LE address type */
+	addr_type = le_addr_type(cp->addr.type);
 
-		err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);
-	}
+	hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
 
+	err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);
 	if (err < 0) {
 		err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNPAIR_DEVICE,
 					MGMT_STATUS_NOT_PAIRED, &rp,
@@ -3124,6 +3093,36 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 		goto unlock;
 	}
 
+	conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, addr_type);
+	if (!conn) {
+		hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type);
+		goto done;
+	}
+
+	/* Abort any ongoing SMP pairing */
+	smp_cancel_pairing(conn);
+
+	/* Defer clearing up the connection parameters until closing to
+	 * give a chance of keeping them if a repairing happens.
+	 */
+	set_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
+
+	/* Disable auto-connection parameters if present */
+	params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr, addr_type);
+	if (params) {
+		if (params->explicit_connect)
+			params->auto_connect = HCI_AUTO_CONN_EXPLICIT;
+		else
+			params->auto_connect = HCI_AUTO_CONN_DISABLED;
+	}
+
+	/* If disconnection is not requested, then clear the connection
+	 * variable so that the link is not terminated.
+	 */
+	if (!cp->disconnect)
+		conn = NULL;
+
+done:
 	/* If the connection variable is set, then termination of the
 	 * link is requested.
 	 */
@@ -3143,9 +3142,7 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	cmd->cmd_complete = addr_cmd_complete;
 
-	dc.handle = cpu_to_le16(conn->handle);
-	dc.reason = 0x13; /* Remote User Terminated Connection */
-	err = hci_send_cmd(hdev, HCI_OP_DISCONNECT, sizeof(dc), &dc);
+	err = hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM);
 	if (err < 0)
 		mgmt_pending_remove(cmd);
 
@@ -3193,7 +3190,8 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
 		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK,
 					       &cp->addr.bdaddr);
 	else
-		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr);
+		conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr,
+					       le_addr_type(cp->addr.type));
 
 	if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) {
 		err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT,
@@ -3544,16 +3542,9 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 		conn = hci_connect_acl(hdev, &cp->addr.bdaddr, sec_level,
 				       auth_type);
 	} else {
-		u8 addr_type;
+		u8 addr_type = le_addr_type(cp->addr.type);
 		struct hci_conn_params *p;
 
-		/* Convert from L2CAP channel address type to HCI address type
-		 */
-		if (cp->addr.type == BDADDR_LE_PUBLIC)
-			addr_type = ADDR_LE_DEV_PUBLIC;
-		else
-			addr_type = ADDR_LE_DEV_RANDOM;
-
 		/* When pairing a new device, it is expected to remember
 		 * this device for future connections. Adding the connection
 		 * parameter information ahead of time allows tracking
@@ -3697,7 +3688,8 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,
 	if (addr->type == BDADDR_BREDR)
 		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);
 	else
-		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr);
+		conn = hci_conn_hash_lookup_le(hdev, &addr->bdaddr,
+					       le_addr_type(addr->type));
 
 	if (!conn) {
 		err = mgmt_cmd_complete(sk, hdev->id, mgmt_op,
@@ -5600,14 +5592,9 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 
 	for (i = 0; i < irk_count; i++) {
 		struct mgmt_irk_info *irk = &cp->irks[i];
-		u8 addr_type;
-
-		if (irk->addr.type == BDADDR_LE_PUBLIC)
-			addr_type = ADDR_LE_DEV_PUBLIC;
-		else
-			addr_type = ADDR_LE_DEV_RANDOM;
 
-		hci_add_irk(hdev, &irk->addr.bdaddr, addr_type, irk->val,
+		hci_add_irk(hdev, &irk->addr.bdaddr,
+			    le_addr_type(irk->addr.type), irk->val,
 			    BDADDR_ANY);
 	}
 
@@ -5687,12 +5674,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 
 	for (i = 0; i < key_count; i++) {
 		struct mgmt_ltk_info *key = &cp->keys[i];
-		u8 type, addr_type, authenticated;
-
-		if (key->addr.type == BDADDR_LE_PUBLIC)
-			addr_type = ADDR_LE_DEV_PUBLIC;
-		else
-			addr_type = ADDR_LE_DEV_RANDOM;
+		u8 type, authenticated;
 
 		switch (key->type) {
 		case MGMT_LTK_UNAUTHENTICATED:
@@ -5718,9 +5700,9 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 			continue;
 		}
 
-		hci_add_ltk(hdev, &key->addr.bdaddr, addr_type, type,
-			    authenticated, key->val, key->enc_size, key->ediv,
-			    key->rand);
+		hci_add_ltk(hdev, &key->addr.bdaddr,
+			    le_addr_type(key->addr.type), type, authenticated,
+			    key->val, key->enc_size, key->ediv, key->rand);
 	}
 
 	err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, 0,
@@ -6232,10 +6214,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 		goto added;
 	}
 
-	if (cp->addr.type == BDADDR_LE_PUBLIC)
-		addr_type = ADDR_LE_DEV_PUBLIC;
-	else
-		addr_type = ADDR_LE_DEV_RANDOM;
+	addr_type = le_addr_type(cp->addr.type);
 
 	if (cp->action == 0x02)
 		auto_conn = HCI_AUTO_CONN_ALWAYS;
@@ -6364,10 +6343,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 			goto complete;
 		}
 
-		if (cp->addr.type == BDADDR_LE_PUBLIC)
-			addr_type = ADDR_LE_DEV_PUBLIC;
-		else
-			addr_type = ADDR_LE_DEV_RANDOM;
+		addr_type = le_addr_type(cp->addr.type);
 
 		/* Kernel internally uses conn_params with resolvable private
 		 * address, but Remove Device allows only identity addresses.
@@ -7873,27 +7849,13 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent)
 	mgmt_event(MGMT_EV_NEW_LONG_TERM_KEY, hdev, &ev, sizeof(ev), NULL);
 }
 
-void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk)
+void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent)
 {
 	struct mgmt_ev_new_irk ev;
 
 	memset(&ev, 0, sizeof(ev));
 
-	/* For identity resolving keys from devices that are already
-	 * using a public address or static random address, do not
-	 * ask for storing this key. The identity resolving key really
-	 * is only mandatory for devices using resolvable random
-	 * addresses.
-	 *
-	 * Storing all identity resolving keys has the downside that
-	 * they will be also loaded on next boot of they system. More
-	 * identity resolving keys, means more time during scanning is
-	 * needed to actually resolve these addresses.
-	 */
-	if (bacmp(&irk->rpa, BDADDR_ANY))
-		ev.store_hint = 0x01;
-	else
-		ev.store_hint = 0x00;
+	ev.store_hint = persistent;
 
 	bacpy(&ev.rpa, &irk->rpa);
 	bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index f315c8d0e43b..fe129663bd3f 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -74,7 +74,7 @@ struct sco_pinfo {
 
 static void sco_sock_timeout(unsigned long arg)
 {
-	struct sock *sk = (struct sock *) arg;
+	struct sock *sk = (struct sock *)arg;
 
 	BT_DBG("sock %p state %d", sk, sk->sk_state);
 
@@ -170,18 +170,21 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
 	sco_conn_unlock(conn);
 
 	if (sk) {
+		sock_hold(sk);
 		bh_lock_sock(sk);
 		sco_sock_clear_timer(sk);
 		sco_chan_del(sk, err);
 		bh_unlock_sock(sk);
 		sco_sock_kill(sk);
+		sock_put(sk);
 	}
 
 	hcon->sco_data = NULL;
 	kfree(conn);
 }
 
-static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, struct sock *parent)
+static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
+			   struct sock *parent)
 {
 	BT_DBG("conn %p", conn);
 
@@ -414,8 +417,10 @@ static void __sco_sock_close(struct sock *sk)
 		if (sco_pi(sk)->conn->hcon) {
 			sk->sk_state = BT_DISCONN;
 			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT);
+			sco_conn_lock(sco_pi(sk)->conn);
 			hci_conn_drop(sco_pi(sk)->conn->hcon);
 			sco_pi(sk)->conn->hcon = NULL;
+			sco_conn_unlock(sco_pi(sk)->conn);
 		} else
 			sco_chan_del(sk, ECONNRESET);
 		break;
@@ -459,7 +464,8 @@ static struct proto sco_proto = {
 	.obj_size	= sizeof(struct sco_pinfo)
 };
 
-static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern)
+static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
+				   int proto, gfp_t prio, int kern)
 {
 	struct sock *sk;
 
@@ -508,7 +514,8 @@ static int sco_sock_create(struct net *net, struct socket *sock, int protocol,
 	return 0;
 }
 
-static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+static int sco_sock_bind(struct socket *sock, struct sockaddr *addr,
+			 int addr_len)
 {
 	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
 	struct sock *sk = sock->sk;
@@ -615,7 +622,8 @@ done:
 	return err;
 }
 
-static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flags)
+static int sco_sock_accept(struct socket *sock, struct socket *newsock,
+			   int flags)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	struct sock *sk = sock->sk, *ch;
@@ -669,7 +677,8 @@ done:
 	return err;
 }
 
-static int sco_sock_getname(struct socket *sock, struct sockaddr *addr, int *len, int peer)
+static int sco_sock_getname(struct socket *sock, struct sockaddr *addr,
+			    int *len, int peer)
 {
 	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
 	struct sock *sk = sock->sk;
@@ -779,7 +788,8 @@ static int sco_sock_recvmsg(struct socket *sock, struct msghdr *msg,
 	return bt_sock_recvmsg(sock, msg, len, flags);
 }
 
-static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
+static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int len, err = 0;
@@ -819,7 +829,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
 		voice.setting = sco_pi(sk)->setting;
 
 		len = min_t(unsigned int, sizeof(voice), optlen);
-		if (copy_from_user((char *) &voice, optval, len)) {
+		if (copy_from_user((char *)&voice, optval, len)) {
 			err = -EFAULT;
 			break;
 		}
@@ -843,7 +853,8 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
 	return err;
 }
 
-static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
+static int sco_sock_getsockopt_old(struct socket *sock, int optname,
+				   char __user *optval, int __user *optlen)
 {
 	struct sock *sk = sock->sk;
 	struct sco_options opts;
@@ -903,7 +914,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user
 	return err;
 }
 
-static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
+static int sco_sock_getsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, int __user *optlen)
 {
 	struct sock *sk = sock->sk;
 	int len, err = 0;
@@ -928,7 +940,7 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
 		}
 
 		if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags),
-			     (u32 __user *) optval))
+			     (u32 __user *)optval))
 			err = -EFAULT;
 
 		break;
@@ -961,7 +973,9 @@ static int sco_sock_shutdown(struct socket *sock, int how)
 	if (!sk)
 		return 0;
 
+	sock_hold(sk);
 	lock_sock(sk);
+
 	if (!sk->sk_shutdown) {
 		sk->sk_shutdown = SHUTDOWN_MASK;
 		sco_sock_clear_timer(sk);
@@ -972,7 +986,10 @@ static int sco_sock_shutdown(struct socket *sock, int how)
 			err = bt_sock_wait_state(sk, BT_CLOSED,
 						 sk->sk_lingertime);
 	}
+
 	release_sock(sk);
+	sock_put(sk);
+
 	return err;
 }
 
@@ -1016,6 +1033,11 @@ static void sco_conn_ready(struct sco_conn *conn)
 	} else {
 		sco_conn_lock(conn);
 
+		if (!conn->hcon) {
+			sco_conn_unlock(conn);
+			return;
+		}
+
 		parent = sco_get_sock_listen(&conn->hcon->src);
 		if (!parent) {
 			sco_conn_unlock(conn);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 0510a577a7b5..c91353841e40 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -495,7 +495,7 @@ static int smp_ah(struct crypto_blkcipher *tfm, const u8 irk[16],
 	}
 
 	/* The output of the random address function ah is:
-	 *	ah(h, r) = e(k, r') mod 2^24
+	 *	ah(k, r) = e(k, r') mod 2^24
 	 * The output of the security function e is then truncated to 24 bits
 	 * by taking the least significant 24 bits of the output of e as the
 	 * result of ah.
@@ -811,7 +811,6 @@ static void smp_failure(struct l2cap_conn *conn, u8 reason)
 		smp_send_cmd(conn, SMP_CMD_PAIRING_FAIL, sizeof(reason),
 			     &reason);
 
-	clear_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags);
 	mgmt_auth_failed(hcon, HCI_ERROR_AUTH_FAILURE);
 
 	if (chan->data)
@@ -1046,8 +1045,24 @@ static void smp_notify_keys(struct l2cap_conn *conn)
 	struct smp_cmd_pairing *rsp = (void *) &smp->prsp[1];
 	bool persistent;
 
+	if (hcon->type == ACL_LINK) {
+		if (hcon->key_type == HCI_LK_DEBUG_COMBINATION)
+			persistent = false;
+		else
+			persistent = !test_bit(HCI_CONN_FLUSH_KEY,
+					       &hcon->flags);
+	} else {
+		/* The LTKs, IRKs and CSRKs should be persistent only if
+		 * both sides had the bonding bit set in their
+		 * authentication requests.
+		 */
+		persistent = !!((req->auth_req & rsp->auth_req) &
+				SMP_AUTH_BONDING);
+	}
+
 	if (smp->remote_irk) {
-		mgmt_new_irk(hdev, smp->remote_irk);
+		mgmt_new_irk(hdev, smp->remote_irk, persistent);
+
 		/* Now that user space can be considered to know the
 		 * identity address track the connection based on it
 		 * from now on (assuming this is an LE link).
@@ -1075,21 +1090,6 @@ static void smp_notify_keys(struct l2cap_conn *conn)
 		}
 	}
 
-	if (hcon->type == ACL_LINK) {
-		if (hcon->key_type == HCI_LK_DEBUG_COMBINATION)
-			persistent = false;
-		else
-			persistent = !test_bit(HCI_CONN_FLUSH_KEY,
-					       &hcon->flags);
-	} else {
-		/* The LTKs and CSRKs should be persistent only if both sides
-		 * had the bonding bit set in their authentication requests.
-		 */
-		persistent = !!((req->auth_req & rsp->auth_req) &
-				SMP_AUTH_BONDING);
-	}
-
-
 	if (smp->csrk) {
 		smp->csrk->bdaddr_type = hcon->dst_type;
 		bacpy(&smp->csrk->bdaddr, &hcon->dst);
@@ -2380,6 +2380,32 @@ unlock:
 	return ret;
 }
 
+void smp_cancel_pairing(struct hci_conn *hcon)
+{
+	struct l2cap_conn *conn = hcon->l2cap_data;
+	struct l2cap_chan *chan;
+	struct smp_chan *smp;
+
+	if (!conn)
+		return;
+
+	chan = conn->smp;
+	if (!chan)
+		return;
+
+	l2cap_chan_lock(chan);
+
+	smp = chan->data;
+	if (smp) {
+		if (test_bit(SMP_FLAG_COMPLETE, &smp->flags))
+			smp_failure(conn, 0);
+		else
+			smp_failure(conn, SMP_UNSPECIFIED);
+	}
+
+	l2cap_chan_unlock(chan);
+}
+
 static int smp_cmd_encrypt_info(struct l2cap_conn *conn, struct sk_buff *skb)
 {
 	struct smp_cmd_encrypt_info *rp = (void *) skb->data;
diff --git a/net/bluetooth/smp.h b/net/bluetooth/smp.h
index 6cf872563ea7..ffcc70b6b199 100644
--- a/net/bluetooth/smp.h
+++ b/net/bluetooth/smp.h
@@ -180,6 +180,7 @@ enum smp_key_pref {
 };
 
 /* SMP Commands */
+void smp_cancel_pairing(struct hci_conn *hcon);
 bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level,
 			     enum smp_key_pref key_pref);
 int smp_conn_security(struct hci_conn *hcon, __u8 sec_level);
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6ed2feb51e3c..5e88d3e17546 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -56,7 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
-	if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid))
+	if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
 		goto out;
 
 	if (is_broadcast_ether_addr(dest))
@@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev)
 	br->bridge_max_age = br->max_age = 20 * HZ;
 	br->bridge_hello_time = br->hello_time = 2 * HZ;
 	br->bridge_forward_delay = br->forward_delay = 15 * HZ;
-	br->ageing_time = 300 * HZ;
+	br->ageing_time = BR_DEFAULT_AGEING_TIME;
 
 	br_netfilter_rtable_init(br);
 	br_stp_timer_init(br);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875da0a4f..a642bb829d09 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -133,15 +133,16 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
 
 static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
 {
-	struct switchdev_obj obj = {
-		.id = SWITCHDEV_OBJ_PORT_FDB,
-		.u.fdb = {
-			.addr = f->addr.addr,
-			.vid = f->vlan_id,
+	struct switchdev_obj_port_fdb fdb = {
+		.obj = {
+			.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+			.flags = SWITCHDEV_F_DEFER,
 		},
+		.vid = f->vlan_id,
 	};
 
-	switchdev_port_obj_del(f->dst->dev, &obj);
+	ether_addr_copy(fdb.addr, f->addr.addr);
+	switchdev_port_obj_del(f->dst->dev, &fdb.obj);
 }
 
 static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
@@ -163,22 +164,27 @@ static void fdb_delete_local(struct net_bridge *br,
 			     struct net_bridge_fdb_entry *f)
 {
 	const unsigned char *addr = f->addr.addr;
-	u16 vid = f->vlan_id;
+	struct net_bridge_vlan_group *vg;
+	const struct net_bridge_vlan *v;
 	struct net_bridge_port *op;
+	u16 vid = f->vlan_id;
 
 	/* Maybe another port has same hw addr? */
 	list_for_each_entry(op, &br->port_list, list) {
+		vg = nbp_vlan_group(op);
 		if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
-		    (!vid || nbp_vlan_find(op, vid))) {
+		    (!vid || br_vlan_find(vg, vid))) {
 			f->dst = op;
 			f->added_by_user = 0;
 			return;
 		}
 	}
 
+	vg = br_vlan_group(br);
+	v = br_vlan_find(vg, vid);
 	/* Maybe bridge device has same hw addr? */
 	if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
-	    (!vid || br_vlan_find(br, vid))) {
+	    (!vid || (v && br_vlan_should_use(v)))) {
 		f->dst = NULL;
 		f->added_by_user = 0;
 		return;
@@ -203,14 +209,14 @@ void br_fdb_find_delete_local(struct net_bridge *br,
 
 void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 {
+	struct net_bridge_vlan_group *vg;
 	struct net_bridge *br = p->br;
-	struct net_port_vlans *pv = nbp_get_vlan_info(p);
-	bool no_vlan = !pv;
+	struct net_bridge_vlan *v;
 	int i;
-	u16 vid;
 
 	spin_lock_bh(&br->hash_lock);
 
+	vg = nbp_vlan_group(p);
 	/* Search all chains since old address/hash is unknown */
 	for (i = 0; i < BR_HASH_SIZE; i++) {
 		struct hlist_node *h;
@@ -226,7 +232,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
 				 * configured, we can safely be done at
 				 * this point.
 				 */
-				if (no_vlan)
+				if (!vg || !vg->num_vlans)
 					goto insert;
 			}
 		}
@@ -236,15 +242,15 @@ insert:
 	/* insert new address,  may fail if invalid address or dup. */
 	fdb_insert(br, p, newaddr, 0);
 
-	if (no_vlan)
+	if (!vg || !vg->num_vlans)
 		goto done;
 
 	/* Now add entries for every VLAN configured on the port.
 	 * This function runs under RTNL so the bitmap will not change
 	 * from under us.
 	 */
-	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
-		fdb_insert(br, p, newaddr, vid);
+	list_for_each_entry(v, &vg->vlan_list, vlist)
+		fdb_insert(br, p, newaddr, v->vid);
 
 done:
 	spin_unlock_bh(&br->hash_lock);
@@ -252,9 +258,9 @@ done:
 
 void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 {
+	struct net_bridge_vlan_group *vg;
 	struct net_bridge_fdb_entry *f;
-	struct net_port_vlans *pv;
-	u16 vid = 0;
+	struct net_bridge_vlan *v;
 
 	spin_lock_bh(&br->hash_lock);
 
@@ -264,20 +270,18 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
 		fdb_delete_local(br, NULL, f);
 
 	fdb_insert(br, NULL, newaddr, 0);
-
+	vg = br_vlan_group(br);
+	if (!vg || !vg->num_vlans)
+		goto out;
 	/* Now remove and add entries for every VLAN configured on the
 	 * bridge.  This function runs under RTNL so the bitmap will not
 	 * change from under us.
 	 */
-	pv = br_get_vlan_info(br);
-	if (!pv)
-		goto out;
-
-	for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
-		f = __br_fdb_get(br, br->dev->dev_addr, vid);
+	list_for_each_entry(v, &vg->vlan_list, vlist) {
+		f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
 		if (f && f->is_local && !f->dst)
 			fdb_delete_local(br, NULL, f);
-		fdb_insert(br, NULL, newaddr, vid);
+		fdb_insert(br, NULL, newaddr, v->vid);
 	}
 out:
 	spin_unlock_bh(&br->hash_lock);
@@ -299,6 +303,8 @@ void br_fdb_cleanup(unsigned long _data)
 			unsigned long this_timer;
 			if (f->is_static)
 				continue;
+			if (f->added_by_external_learn)
+				continue;
 			this_timer = f->updated + delay;
 			if (time_before_eq(this_timer, jiffies))
 				fdb_delete(br, f);
@@ -489,7 +495,9 @@ static struct net_bridge_fdb_entry *fdb_find_rcu(struct hlist_head *head,
 static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 					       struct net_bridge_port *source,
 					       const unsigned char *addr,
-					       __u16 vid)
+					       __u16 vid,
+					       unsigned char is_local,
+					       unsigned char is_static)
 {
 	struct net_bridge_fdb_entry *fdb;
 
@@ -498,8 +506,8 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 		memcpy(fdb->addr.addr, addr, ETH_ALEN);
 		fdb->dst = source;
 		fdb->vlan_id = vid;
-		fdb->is_local = 0;
-		fdb->is_static = 0;
+		fdb->is_local = is_local;
+		fdb->is_static = is_static;
 		fdb->added_by_user = 0;
 		fdb->added_by_external_learn = 0;
 		fdb->updated = fdb->used = jiffies;
@@ -530,11 +538,10 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		fdb_delete(br, fdb);
 	}
 
-	fdb = fdb_create(head, source, addr, vid);
+	fdb = fdb_create(head, source, addr, vid, 1, 1);
 	if (!fdb)
 		return -ENOMEM;
 
-	fdb->is_local = fdb->is_static = 1;
 	fdb_add_hw_addr(br, addr);
 	fdb_notify(br, fdb, RTM_NEWNEIGH);
 	return 0;
@@ -591,7 +598,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 	} else {
 		spin_lock(&br->hash_lock);
 		if (likely(!fdb_find(head, addr, vid))) {
-			fdb = fdb_create(head, source, addr, vid);
+			fdb = fdb_create(head, source, addr, vid, 0, 0);
 			if (fdb) {
 				if (unlikely(added_by_user))
 					fdb->added_by_user = 1;
@@ -605,13 +612,14 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 	}
 }
 
-static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb)
+static int fdb_to_nud(const struct net_bridge *br,
+		      const struct net_bridge_fdb_entry *fdb)
 {
 	if (fdb->is_local)
 		return NUD_PERMANENT;
 	else if (fdb->is_static)
 		return NUD_NOARP;
-	else if (has_expired(fdb->dst->br, fdb))
+	else if (has_expired(br, fdb))
 		return NUD_STALE;
 	else
 		return NUD_REACHABLE;
@@ -637,7 +645,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	ndm->ndm_flags	 = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0;
 	ndm->ndm_type	 = 0;
 	ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
-	ndm->ndm_state   = fdb_to_nud(fdb);
+	ndm->ndm_state   = fdb_to_nud(br, fdb);
 
 	if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
 		goto nla_put_failure;
@@ -767,7 +775,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
 
-		fdb = fdb_create(head, source, addr, vid);
+		fdb = fdb_create(head, source, addr, vid, 0, 0);
 		if (!fdb)
 			return -ENOMEM;
 
@@ -782,7 +790,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
 		}
 	}
 
-	if (fdb_to_nud(fdb) != state) {
+	if (fdb_to_nud(br, fdb) != state) {
 		if (state & NUD_PERMANENT) {
 			fdb->is_local = 1;
 			if (!fdb->is_static) {
@@ -842,9 +850,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	       struct net_device *dev,
 	       const unsigned char *addr, u16 vid, u16 nlh_flags)
 {
-	struct net_bridge_port *p;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p = NULL;
+	struct net_bridge_vlan *v;
+	struct net_bridge *br = NULL;
 	int err = 0;
-	struct net_port_vlans *pv;
 
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
@@ -856,34 +866,51 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 	}
 
-	p = br_port_get_rtnl(dev);
-	if (p == NULL) {
-		pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
-			dev->name);
-		return -EINVAL;
+	if (dev->priv_flags & IFF_EBRIDGE) {
+		br = netdev_priv(dev);
+		vg = br_vlan_group(br);
+	} else {
+		p = br_port_get_rtnl(dev);
+		if (!p) {
+			pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
+				dev->name);
+			return -EINVAL;
+		}
+		vg = nbp_vlan_group(p);
 	}
 
-	pv = nbp_get_vlan_info(p);
 	if (vid) {
-		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
-			pr_info("bridge: RTM_NEWNEIGH with unconfigured "
-				"vlan %d on port %s\n", vid, dev->name);
+		v = br_vlan_find(vg, vid);
+		if (!v || !br_vlan_should_use(v)) {
+			pr_info("bridge: RTM_NEWNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
 			return -EINVAL;
 		}
 
 		/* VID was specified, so use it. */
-		err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+		if (dev->priv_flags & IFF_EBRIDGE)
+			err = br_fdb_insert(br, NULL, addr, vid);
+		else
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 	} else {
-		err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
-		if (err || !pv)
+		if (dev->priv_flags & IFF_EBRIDGE)
+			err = br_fdb_insert(br, NULL, addr, 0);
+		else
+			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+		if (err || !vg || !vg->num_vlans)
 			goto out;
 
 		/* We have vlans configured on this port and user didn't
 		 * specify a VLAN.  To be nice, add/update entry for every
 		 * vlan on this port.
 		 */
-		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
-			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+		list_for_each_entry(v, &vg->vlan_list, vlist) {
+			if (!br_vlan_should_use(v))
+				continue;
+			if (dev->priv_flags & IFF_EBRIDGE)
+				err = br_fdb_insert(br, NULL, addr, v->vid);
+			else
+				err = __br_fdb_add(ndm, p, addr, nlh_flags,
+						   v->vid);
 			if (err)
 				goto out;
 		}
@@ -893,6 +920,32 @@ out:
 	return err;
 }
 
+static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
+			      u16 vid)
+{
+	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
+	struct net_bridge_fdb_entry *fdb;
+
+	fdb = fdb_find(head, addr, vid);
+	if (!fdb)
+		return -ENOENT;
+
+	fdb_delete(br, fdb);
+	return 0;
+}
+
+static int __br_fdb_delete_by_addr(struct net_bridge *br,
+				   const unsigned char *addr, u16 vid)
+{
+	int err;
+
+	spin_lock_bh(&br->hash_lock);
+	err = fdb_delete_by_addr(br, addr, vid);
+	spin_unlock_bh(&br->hash_lock);
+
+	return err;
+}
+
 static int fdb_delete_by_addr_and_port(struct net_bridge_port *p,
 				       const u8 *addr, u16 vlan)
 {
@@ -925,38 +978,53 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 		  struct net_device *dev,
 		  const unsigned char *addr, u16 vid)
 {
-	struct net_bridge_port *p;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p = NULL;
+	struct net_bridge_vlan *v;
+	struct net_bridge *br = NULL;
 	int err;
-	struct net_port_vlans *pv;
 
-	p = br_port_get_rtnl(dev);
-	if (p == NULL) {
-		pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
-			dev->name);
-		return -EINVAL;
+	if (dev->priv_flags & IFF_EBRIDGE) {
+		br = netdev_priv(dev);
+		vg = br_vlan_group(br);
+	} else {
+		p = br_port_get_rtnl(dev);
+		if (!p) {
+			pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
+				dev->name);
+			return -EINVAL;
+		}
+		vg = nbp_vlan_group(p);
 	}
 
-	pv = nbp_get_vlan_info(p);
 	if (vid) {
-		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
-			pr_info("bridge: RTM_DELNEIGH with unconfigured "
-				"vlan %d on port %s\n", vid, dev->name);
+		v = br_vlan_find(vg, vid);
+		if (!v) {
+			pr_info("bridge: RTM_DELNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
 			return -EINVAL;
 		}
 
-		err = __br_fdb_delete(p, addr, vid);
+		if (dev->priv_flags & IFF_EBRIDGE)
+			err = __br_fdb_delete_by_addr(br, addr, vid);
+		else
+			err = __br_fdb_delete(p, addr, vid);
 	} else {
 		err = -ENOENT;
-		err &= __br_fdb_delete(p, addr, 0);
-		if (!pv)
+		if (dev->priv_flags & IFF_EBRIDGE)
+			err = __br_fdb_delete_by_addr(br, addr, 0);
+		else
+			err &= __br_fdb_delete(p, addr, 0);
+
+		if (!vg || !vg->num_vlans)
 			goto out;
 
-		/* We have vlans configured on this port and user didn't
-		 * specify a VLAN.  To be nice, add/update entry for every
-		 * vlan on this port.
-		 */
-		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
-			err &= __br_fdb_delete(p, addr, vid);
+		list_for_each_entry(v, &vg->vlan_list, vlist) {
+			if (!br_vlan_should_use(v))
+				continue;
+			if (dev->priv_flags & IFF_EBRIDGE)
+				err = __br_fdb_delete_by_addr(br, addr, v->vid);
+			else
+				err &= __br_fdb_delete(p, addr, v->vid);
 		}
 	}
 out:
@@ -1032,7 +1100,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 	head = &br->hash[br_mac_hash(addr, vid)];
 	fdb = fdb_find(head, addr, vid);
 	if (!fdb) {
-		fdb = fdb_create(head, p, addr, vid);
+		fdb = fdb_create(head, p, addr, vid, 0, 0);
 		if (!fdb) {
 			err = -ENOMEM;
 			goto err_unlock;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index fa7bfced888e..fcdb86dd5a23 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -30,12 +30,14 @@ static int deliver_clone(const struct net_bridge_port *prev,
 static inline int should_deliver(const struct net_bridge_port *p,
 				 const struct sk_buff *skb)
 {
+	struct net_bridge_vlan_group *vg;
+
+	vg = nbp_vlan_group_rcu(p);
 	return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
-		br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) &&
-		p->state == BR_STATE_FORWARDING;
+		br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
 }
 
-int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
+int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if (!is_skb_forwardable(skb->dev, skb))
 		goto drop;
@@ -65,10 +67,10 @@ drop:
 }
 EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
 
-int br_forward_finish(struct sock *sk, struct sk_buff *skb)
+int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb,
-		       NULL, skb->dev,
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
+		       net, sk, skb, NULL, skb->dev,
 		       br_dev_queue_push_xmit);
 
 }
@@ -76,7 +78,10 @@ EXPORT_SYMBOL_GPL(br_forward_finish);
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
-	skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
+	struct net_bridge_vlan_group *vg;
+
+	vg = nbp_vlan_group_rcu(to);
+	skb = br_handle_vlan(to->br, vg, skb);
 	if (!skb)
 		return;
 
@@ -92,13 +97,14 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 		return;
 	}
 
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
-		NULL, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+		dev_net(skb->dev), NULL, skb,NULL, skb->dev,
 		br_forward_finish);
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 {
+	struct net_bridge_vlan_group *vg;
 	struct net_device *indev;
 
 	if (skb_warn_if_lro(skb)) {
@@ -106,7 +112,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 		return;
 	}
 
-	skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
+	vg = nbp_vlan_group_rcu(to);
+	skb = br_handle_vlan(to->br, vg, skb);
 	if (!skb)
 		return;
 
@@ -114,8 +121,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 	skb->dev = to->dev;
 	skb_forward_csum(skb);
 
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb,
-		indev, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,
+		dev_net(indev), NULL, skb, indev, skb->dev,
 		br_forward_finish);
 }
 
@@ -134,7 +141,7 @@ EXPORT_SYMBOL_GPL(br_deliver);
 /* called with rcu_read_lock */
 void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0)
 {
-	if (should_deliver(to, skb)) {
+	if (to && should_deliver(to, skb)) {
 		if (skb0)
 			deliver_clone(to, skb, __br_forward);
 		else
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 45e4757c6fd2..ec02f5869a78 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <net/sock.h>
 #include <linux/if_vlan.h>
+#include <net/switchdev.h>
 
 #include "br_private.h"
 
@@ -250,6 +251,8 @@ static void del_nbp(struct net_bridge_port *p)
 
 	nbp_vlan_flush(p);
 	br_fdb_delete_by_port(br, p, 0, 1);
+	switchdev_deferred_process();
+
 	nbp_update_port_count(br);
 
 	netdev_upper_dev_unlink(dev, br->dev);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f921a5dce22d..f7fba74108a9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,38 +26,44 @@
 br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
 EXPORT_SYMBOL(br_should_route_hook);
 
+static int
+br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	return netif_receive_skb(skb);
+}
+
 static int br_pass_frame_up(struct sk_buff *skb)
 {
 	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
 	struct net_bridge *br = netdev_priv(brdev);
+	struct net_bridge_vlan_group *vg;
 	struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
-	struct net_port_vlans *pv;
 
 	u64_stats_update_begin(&brstats->syncp);
 	brstats->rx_packets++;
 	brstats->rx_bytes += skb->len;
 	u64_stats_update_end(&brstats->syncp);
 
+	vg = br_vlan_group_rcu(br);
 	/* Bridge is just like any other port.  Make sure the
 	 * packet is allowed except in promisc modue when someone
 	 * may be running packet capture.
 	 */
-	pv = br_get_vlan_info(br);
 	if (!(brdev->flags & IFF_PROMISC) &&
-	    !br_allowed_egress(br, pv, skb)) {
+	    !br_allowed_egress(vg, skb)) {
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
 
 	indev = skb->dev;
 	skb->dev = brdev;
-	skb = br_handle_vlan(br, pv, skb);
+	skb = br_handle_vlan(br, vg, skb);
 	if (!skb)
 		return NET_RX_DROP;
 
-	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
-		       indev, NULL,
-		       netif_receive_skb_sk);
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+		       dev_net(indev), NULL, skb, indev, NULL,
+		       br_netif_receive_skb);
 }
 
 static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
@@ -120,7 +126,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 }
 
 /* note: already called with rcu_read_lock */
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
@@ -134,7 +140,7 @@ int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
 	if (!p || p->state == BR_STATE_DISABLED)
 		goto drop;
 
-	if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid))
+	if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
 		goto out;
 
 	/* insert into forwarding database after filtering to avoid spoofing */
@@ -208,7 +214,7 @@ drop:
 EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
 /* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb)
+static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	u16 vid = 0;
@@ -278,8 +284,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 		}
 
 		/* Deliver packet to local host only */
-		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
-			    skb->dev, NULL, br_handle_local_finish)) {
+		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+			    dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+			    br_handle_local_finish)) {
 			return RX_HANDLER_CONSUMED; /* consumed by filter */
 		} else {
 			*pskb = skb;
@@ -303,8 +310,8 @@ forward:
 		if (ether_addr_equal(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
-		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb,
-			skb->dev, NULL,
+		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
+			dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 			br_handle_frame_finish);
 		break;
 	default:
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 8d423bc649b9..263b4de4de57 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 
-		br->ageing_time = clock_t_to_jiffies(args[1]);
-		return 0;
+		return br_set_ageing_time(br, args[1]);
 
 	case BRCTL_GET_PORT_INFO:
 	{
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index d747275fad18..cd8deea2d074 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -464,11 +464,11 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
 static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
-	unsigned short vid = VLAN_N_VID;
+	struct net_bridge_vlan_group *vg;
 	struct net_device *dev, *pdev;
 	struct br_mdb_entry *entry;
 	struct net_bridge_port *p;
-	struct net_port_vlans *pv;
+	struct net_bridge_vlan *v;
 	struct net_bridge *br;
 	int err;
 
@@ -489,10 +489,10 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
 		return -EINVAL;
 
-	pv = nbp_get_vlan_info(p);
-	if (br_vlan_enabled(br) && pv && entry->vid == 0) {
-		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
-			entry->vid = vid;
+	vg = nbp_vlan_group(p);
+	if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+		list_for_each_entry(v, &vg->vlan_list, vlist) {
+			entry->vid = v->vid;
 			err = __br_mdb_add(net, br, entry);
 			if (err)
 				break;
@@ -566,11 +566,11 @@ unlock:
 static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
-	unsigned short vid = VLAN_N_VID;
+	struct net_bridge_vlan_group *vg;
 	struct net_device *dev, *pdev;
 	struct br_mdb_entry *entry;
 	struct net_bridge_port *p;
-	struct net_port_vlans *pv;
+	struct net_bridge_vlan *v;
 	struct net_bridge *br;
 	int err;
 
@@ -591,10 +591,10 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (!p || p->br != br || p->state == BR_STATE_DISABLED)
 		return -EINVAL;
 
-	pv = nbp_get_vlan_info(p);
-	if (br_vlan_enabled(br) && pv && entry->vid == 0) {
-		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
-			entry->vid = vid;
+	vg = nbp_vlan_group(p);
+	if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+		list_for_each_entry(v, &vg->vlan_list, vlist) {
+			entry->vid = v->vid;
 			err = __br_mdb_del(br, entry);
 			if (!err)
 				__br_mdb_notify(dev, entry, RTM_DELMDB);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 480b3de1a0e3..03661d97463c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -829,8 +829,8 @@ static void __br_multicast_send_query(struct net_bridge *br,
 
 	if (port) {
 		skb->dev = port->dev;
-		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
-			NULL, skb->dev,
+		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+			dev_net(port->dev), NULL, skb, NULL, skb->dev,
 			br_dev_queue_push_xmit);
 	} else {
 		br_multicast_select_own_querier(br, ip, skb);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 0a6f095bb0c9..7ddbe7ec81d6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -111,7 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
 /* largest possible L2 header, see br_nf_dev_queue_xmit() */
 #define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
 
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 struct brnf_frag_data {
 	char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
 	u8 encap_size;
@@ -121,7 +120,6 @@ struct brnf_frag_data {
 };
 
 static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
-#endif
 
 static void nf_bridge_info_free(struct sk_buff *skb)
 {
@@ -189,10 +187,9 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
  * expected format
  */
 
-static int br_validate_ipv4(struct sk_buff *skb)
+static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
 {
 	const struct iphdr *iph;
-	struct net_device *dev = skb->dev;
 	u32 len;
 
 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
@@ -213,13 +210,13 @@ static int br_validate_ipv4(struct sk_buff *skb)
 
 	len = ntohs(iph->tot_len);
 	if (skb->len < len) {
-		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
 		goto drop;
 	} else if (len < (iph->ihl*4))
 		goto inhdr_error;
 
 	if (pskb_trim_rcsum(skb, len)) {
-		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 
@@ -232,7 +229,7 @@ static int br_validate_ipv4(struct sk_buff *skb)
 	return 0;
 
 inhdr_error:
-	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
 drop:
 	return -1;
 }
@@ -256,7 +253,7 @@ void nf_bridge_update_protocol(struct sk_buff *skb)
  * don't, we use the neighbour framework to find out. In both cases, we make
  * sure that br_handle_frame_finish() is called afterwards.
  */
-int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
+int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct neighbour *neigh;
 	struct dst_entry *dst;
@@ -273,7 +270,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
 		if (neigh->hh.hh_len) {
 			neigh_hh_bridge(&neigh->hh, skb);
 			skb->dev = nf_bridge->physindev;
-			ret = br_handle_frame_finish(sk, skb);
+			ret = br_handle_frame_finish(net, sk, skb);
 		} else {
 			/* the neighbour function below overwrites the complete
 			 * MAC header, so we save the Ethernet source address and
@@ -342,7 +339,7 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
  * device, we proceed as if ip_route_input() succeeded. If it differs from the
  * logical bridge port or if ip_route_output_key() fails we drop the packet.
  */
-static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
+static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct iphdr *iph = ip_hdr(skb);
@@ -371,7 +368,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
 			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
 				goto free_skb;
 
-			rt = ip_route_output(dev_net(dev), iph->daddr, 0,
+			rt = ip_route_output(net, iph->daddr, 0,
 					     RT_TOS(iph->tos), 0);
 			if (!IS_ERR(rt)) {
 				/* - Bridged-and-DNAT'ed traffic doesn't
@@ -393,7 +390,7 @@ bridged_dnat:
 				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
 					       NF_BR_PRE_ROUTING,
-					       sk, skb, skb->dev, NULL,
+					       net, sk, skb, skb->dev, NULL,
 					       br_nf_pre_routing_finish_bridge,
 					       1);
 				return 0;
@@ -413,7 +410,7 @@ bridged_dnat:
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
 		       skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
@@ -464,7 +461,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
  * receiving device) to make netfilter happy, the REDIRECT
  * target in particular.  Save the original destination IP
  * address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
+static unsigned int br_nf_pre_routing(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
@@ -486,7 +483,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 			return NF_ACCEPT;
 
 		nf_bridge_pull_encap_header_rcsum(skb);
-		return br_nf_pre_routing_ipv6(ops, skb, state);
+		return br_nf_pre_routing_ipv6(priv, skb, state);
 	}
 
 	if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -497,7 +494,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 
 	nf_bridge_pull_encap_header_rcsum(skb);
 
-	if (br_validate_ipv4(skb))
+	if (br_validate_ipv4(state->net, skb))
 		return NF_DROP;
 
 	nf_bridge_put(skb->nf_bridge);
@@ -511,7 +508,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
 
 	skb->protocol = htons(ETH_P_IP);
 
-	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
 		skb->dev, NULL,
 		br_nf_pre_routing_finish);
 
@@ -526,7 +523,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
  * took place when the packet entered the bridge), but we
  * register an IPv4 PRE_ROUTING 'sabotage' hook that will
  * prevent this from happening. */
-static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
+static unsigned int br_nf_local_in(void *priv,
 				   struct sk_buff *skb,
 				   const struct nf_hook_state *state)
 {
@@ -535,7 +532,7 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
 }
 
 /* PF_BRIDGE/FORWARD *************************************************/
-static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
+static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct net_device *in;
@@ -559,7 +556,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
 	}
 	nf_bridge_push_encap_header(skb);
 
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb,
 		       in, skb->dev, br_forward_finish, 1);
 	return 0;
 }
@@ -570,7 +567,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
  * but we are still able to filter on the 'real' indev/outdev
  * because of the physdev module. For ARP, indev and outdev are the
  * bridge ports. */
-static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
+static unsigned int br_nf_forward_ip(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
@@ -609,13 +606,13 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 	}
 
 	if (pf == NFPROTO_IPV4) {
-		if (br_validate_ipv4(skb))
+		if (br_validate_ipv4(state->net, skb))
 			return NF_DROP;
 		IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
 	}
 
 	if (pf == NFPROTO_IPV6) {
-		if (br_validate_ipv6(skb))
+		if (br_validate_ipv6(state->net, skb))
 			return NF_DROP;
 		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
 	}
@@ -626,14 +623,14 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 	else
 		skb->protocol = htons(ETH_P_IPV6);
 
-	NF_HOOK(pf, NF_INET_FORWARD, NULL, skb,
+	NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
 		brnf_get_logical_dev(skb, state->in),
 		parent,	br_nf_forward_finish);
 
 	return NF_STOLEN;
 }
 
-static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
+static unsigned int br_nf_forward_arp(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
@@ -661,14 +658,13 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 	}
 	*d = state->in;
-	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb,
+	NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb,
 		state->in, state->out, br_nf_forward_finish);
 
 	return NF_STOLEN;
 }
 
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
-static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
+static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct brnf_frag_data *data;
 	int err;
@@ -690,30 +686,26 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
 	__skb_push(skb, data->encap_size);
 
 	nf_bridge_info_free(skb);
-	return br_dev_queue_push_xmit(sk, skb);
+	return br_dev_queue_push_xmit(net, sk, skb);
 }
-#endif
 
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
-static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
-			     int (*output)(struct sock *, struct sk_buff *))
+static int
+br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		  int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	unsigned int mtu = ip_skb_dst_mtu(skb);
 	struct iphdr *iph = ip_hdr(skb);
-	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->dst.dev;
 
 	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
 		     (IPCB(skb)->frag_max_size &&
 		      IPCB(skb)->frag_max_size > mtu))) {
-		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
 
-	return ip_do_fragment(sk, skb, output);
+	return ip_do_fragment(net, sk, skb, output);
 }
-#endif
 
 static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
@@ -722,7 +714,7 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 	return 0;
 }
 
-static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
+static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge;
 	unsigned int mtu_reserved;
@@ -731,19 +723,19 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
 
 	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
 		nf_bridge_info_free(skb);
-		return br_dev_queue_push_xmit(sk, skb);
+		return br_dev_queue_push_xmit(net, sk, skb);
 	}
 
 	nf_bridge = nf_bridge_info_get(skb);
 
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
 	/* This is wrong! We should preserve the original fragment
 	 * boundaries by preserving frag_list rather than refragmenting.
 	 */
-	if (skb->protocol == htons(ETH_P_IP)) {
+	if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) &&
+	    skb->protocol == htons(ETH_P_IP)) {
 		struct brnf_frag_data *data;
 
-		if (br_validate_ipv4(skb))
+		if (br_validate_ipv4(net, skb))
 			goto drop;
 
 		IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
@@ -760,15 +752,14 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
 		skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
 						 data->size);
 
-		return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
+		return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
 	}
-#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
-	if (skb->protocol == htons(ETH_P_IPV6)) {
+	if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
+	    skb->protocol == htons(ETH_P_IPV6)) {
 		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
 		struct brnf_frag_data *data;
 
-		if (br_validate_ipv6(skb))
+		if (br_validate_ipv6(net, skb))
 			goto drop;
 
 		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
@@ -783,21 +774,20 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
 						 data->size);
 
 		if (v6ops)
-			return v6ops->fragment(sk, skb, br_nf_push_frag_xmit);
+			return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
 
 		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
-#endif
 	nf_bridge_info_free(skb);
-	return br_dev_queue_push_xmit(sk, skb);
+	return br_dev_queue_push_xmit(net, sk, skb);
  drop:
 	kfree_skb(skb);
 	return 0;
 }
 
 /* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
+static unsigned int br_nf_post_routing(void *priv,
 				       struct sk_buff *skb,
 				       const struct nf_hook_state *state)
 {
@@ -836,7 +826,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
 	else
 		skb->protocol = htons(ETH_P_IPV6);
 
-	NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb,
+	NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb,
 		NULL, realoutdev,
 		br_nf_dev_queue_xmit);
 
@@ -846,7 +836,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
 /* IP/SABOTAGE *****************************************************/
 /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
  * for the second time. */
-static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
+static unsigned int ip_sabotage_in(void *priv,
 				   struct sk_buff *skb,
 				   const struct nf_hook_state *state)
 {
@@ -880,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 	skb->dev = nf_bridge->physindev;
 
 	nf_bridge->physoutdev = NULL;
-	br_handle_frame_finish(NULL, skb);
+	br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
 }
 
 static int br_nf_dev_xmit(struct sk_buff *skb)
@@ -906,49 +896,42 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
 static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 	{
 		.hook = br_nf_pre_routing,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_PRE_ROUTING,
 		.priority = NF_BR_PRI_BRNF,
 	},
 	{
 		.hook = br_nf_local_in,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_LOCAL_IN,
 		.priority = NF_BR_PRI_BRNF,
 	},
 	{
 		.hook = br_nf_forward_ip,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_FORWARD,
 		.priority = NF_BR_PRI_BRNF - 1,
 	},
 	{
 		.hook = br_nf_forward_arp,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_FORWARD,
 		.priority = NF_BR_PRI_BRNF,
 	},
 	{
 		.hook = br_nf_post_routing,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_BRIDGE,
 		.hooknum = NF_BR_POST_ROUTING,
 		.priority = NF_BR_PRI_LAST,
 	},
 	{
 		.hook = ip_sabotage_in,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_IPV4,
 		.hooknum = NF_INET_PRE_ROUTING,
 		.priority = NF_IP_PRI_FIRST,
 	},
 	{
 		.hook = ip_sabotage_in,
-		.owner = THIS_MODULE,
 		.pf = NFPROTO_IPV6,
 		.hooknum = NF_INET_PRE_ROUTING,
 		.priority = NF_IP6_PRI_FIRST,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 77383bfe7ea3..d61f56efc8dc 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -100,10 +100,9 @@ bad:
 	return -1;
 }
 
-int br_validate_ipv6(struct sk_buff *skb)
+int br_validate_ipv6(struct net *net, struct sk_buff *skb)
 {
 	const struct ipv6hdr *hdr;
-	struct net_device *dev = skb->dev;
 	struct inet6_dev *idev = __in6_dev_get(skb->dev);
 	u32 pkt_len;
 	u8 ip6h_len = sizeof(struct ipv6hdr);
@@ -123,12 +122,12 @@ int br_validate_ipv6(struct sk_buff *skb)
 
 	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
 		if (pkt_len + ip6h_len > skb->len) {
-			IP6_INC_STATS_BH(dev_net(dev), idev,
+			IP6_INC_STATS_BH(net, idev,
 					 IPSTATS_MIB_INTRUNCATEDPKTS);
 			goto drop;
 		}
 		if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
-			IP6_INC_STATS_BH(dev_net(dev), idev,
+			IP6_INC_STATS_BH(net, idev,
 					 IPSTATS_MIB_INDISCARDS);
 			goto drop;
 		}
@@ -143,7 +142,7 @@ int br_validate_ipv6(struct sk_buff *skb)
 	return 0;
 
 inhdr_error:
-	IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
+	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
 drop:
 	return -1;
 }
@@ -161,7 +160,7 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb,
  * for br_nf_pre_routing_finish(), same logic is used here but
  * equivalent IPv6 function ip6_route_input() called indirectly.
  */
-static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
+static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct rtable *rt;
@@ -189,7 +188,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
 			nf_bridge_update_protocol(skb);
 			nf_bridge_push_encap_header(skb);
 			NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
-				       sk, skb, skb->dev, NULL,
+				       net, sk, skb, skb->dev, NULL,
 				       br_nf_pre_routing_finish_bridge,
 				       1);
 			return 0;
@@ -208,7 +207,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
 		       skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
@@ -218,13 +217,13 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
 /* Replicate the checks that IPv6 does on packet reception and pass the packet
  * to ip6tables.
  */
-unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
+unsigned int br_nf_pre_routing_ipv6(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
 	struct nf_bridge_info *nf_bridge;
 
-	if (br_validate_ipv6(skb))
+	if (br_validate_ipv6(state->net, skb))
 		return NF_DROP;
 
 	nf_bridge_put(skb->nf_bridge);
@@ -237,7 +236,7 @@ unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
 	nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
 
 	skb->protocol = htons(ETH_P_IPV6);
-	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
 		skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ea748c93a07f..40197ff8918a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -21,36 +21,35 @@
 #include "br_private.h"
 #include "br_private_stp.h"
 
-static int br_get_num_vlan_infos(const struct net_port_vlans *pv,
-				 u32 filter_mask)
+static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
+				u32 filter_mask)
 {
-	u16 vid_range_start = 0, vid_range_end = 0;
-	u16 vid_range_flags = 0;
-	u16 pvid, vid, flags;
+	struct net_bridge_vlan *v;
+	u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0;
+	u16 flags, pvid;
 	int num_vlans = 0;
 
-	if (filter_mask & RTEXT_FILTER_BRVLAN)
-		return pv->num_vlans;
-
 	if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
 		return 0;
 
-	/* Count number of vlan info's
-	 */
-	pvid = br_get_pvid(pv);
-	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+	pvid = br_get_pvid(vg);
+	/* Count number of vlan infos */
+	list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
 		flags = 0;
-		if (vid == pvid)
+		/* only a context, bridge vlan not activated */
+		if (!br_vlan_should_use(v))
+			continue;
+		if (v->vid == pvid)
 			flags |= BRIDGE_VLAN_INFO_PVID;
 
-		if (test_bit(vid, pv->untagged_bitmap))
+		if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
 			flags |= BRIDGE_VLAN_INFO_UNTAGGED;
 
 		if (vid_range_start == 0) {
 			goto initvars;
-		} else if ((vid - vid_range_end) == 1 &&
+		} else if ((v->vid - vid_range_end) == 1 &&
 			flags == vid_range_flags) {
-			vid_range_end = vid;
+			vid_range_end = v->vid;
 			continue;
 		} else {
 			if ((vid_range_end - vid_range_start) > 0)
@@ -59,8 +58,8 @@ static int br_get_num_vlan_infos(const struct net_port_vlans *pv,
 				num_vlans += 1;
 		}
 initvars:
-		vid_range_start = vid;
-		vid_range_end = vid;
+		vid_range_start = v->vid;
+		vid_range_end = v->vid;
 		vid_range_flags = flags;
 	}
 
@@ -74,28 +73,43 @@ initvars:
 	return num_vlans;
 }
 
+static int br_get_num_vlan_infos(struct net_bridge_vlan_group *vg,
+				 u32 filter_mask)
+{
+	int num_vlans;
+
+	if (!vg)
+		return 0;
+
+	if (filter_mask & RTEXT_FILTER_BRVLAN)
+		return vg->num_vlans;
+
+	rcu_read_lock();
+	num_vlans = __get_num_vlan_infos(vg, filter_mask);
+	rcu_read_unlock();
+
+	return num_vlans;
+}
+
 static size_t br_get_link_af_size_filtered(const struct net_device *dev,
 					   u32 filter_mask)
 {
-	struct net_port_vlans *pv;
+	struct net_bridge_vlan_group *vg = NULL;
+	struct net_bridge_port *p;
+	struct net_bridge *br;
 	int num_vlan_infos;
 
 	rcu_read_lock();
-	if (br_port_exists(dev))
-		pv = nbp_get_vlan_info(br_port_get_rcu(dev));
-	else if (dev->priv_flags & IFF_EBRIDGE)
-		pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
-	else
-		pv = NULL;
-	if (pv)
-		num_vlan_infos = br_get_num_vlan_infos(pv, filter_mask);
-	else
-		num_vlan_infos = 0;
+	if (br_port_exists(dev)) {
+		p = br_port_get_rcu(dev);
+		vg = nbp_vlan_group_rcu(p);
+	} else if (dev->priv_flags & IFF_EBRIDGE) {
+		br = netdev_priv(dev);
+		vg = br_vlan_group_rcu(br);
+	}
+	num_vlan_infos = br_get_num_vlan_infos(vg, filter_mask);
 	rcu_read_unlock();
 
-	if (!num_vlan_infos)
-		return 0;
-
 	/* Each VLAN is returned in bridge_vlan_info along with flags */
 	return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info));
 }
@@ -113,6 +127,20 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(1)	/* IFLA_BRPORT_UNICAST_FLOOD */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROXYARP */
 		+ nla_total_size(1)	/* IFLA_BRPORT_PROXYARP_WIFI */
+		+ nla_total_size(sizeof(struct ifla_bridge_id))	/* IFLA_BRPORT_ROOT_ID */
+		+ nla_total_size(sizeof(struct ifla_bridge_id))	/* IFLA_BRPORT_BRIDGE_ID */
+		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_DESIGNATED_PORT */
+		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_DESIGNATED_COST */
+		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_ID */
+		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_NO */
+		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_TOPOLOGY_CHANGE_ACK */
+		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_CONFIG_PENDING */
+		+ nla_total_size(sizeof(u64))	/* IFLA_BRPORT_MESSAGE_AGE_TIMER */
+		+ nla_total_size(sizeof(u64))	/* IFLA_BRPORT_FORWARD_DELAY_TIMER */
+		+ nla_total_size(sizeof(u64))	/* IFLA_BRPORT_HOLD_TIMER */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_MULTICAST_ROUTER */
+#endif
 		+ 0;
 }
 
@@ -134,6 +162,7 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 			      const struct net_bridge_port *p)
 {
 	u8 mode = !!(p->flags & BR_HAIRPIN_MODE);
+	u64 timerval;
 
 	if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) ||
 	    nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) ||
@@ -146,9 +175,36 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 	    nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
-		       !!(p->flags & BR_PROXYARP_WIFI)))
+		       !!(p->flags & BR_PROXYARP_WIFI)) ||
+	    nla_put(skb, IFLA_BRPORT_ROOT_ID, sizeof(struct ifla_bridge_id),
+		    &p->designated_root) ||
+	    nla_put(skb, IFLA_BRPORT_BRIDGE_ID, sizeof(struct ifla_bridge_id),
+		    &p->designated_bridge) ||
+	    nla_put_u16(skb, IFLA_BRPORT_DESIGNATED_PORT, p->designated_port) ||
+	    nla_put_u16(skb, IFLA_BRPORT_DESIGNATED_COST, p->designated_cost) ||
+	    nla_put_u16(skb, IFLA_BRPORT_ID, p->port_id) ||
+	    nla_put_u16(skb, IFLA_BRPORT_NO, p->port_no) ||
+	    nla_put_u8(skb, IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+		       p->topology_change_ack) ||
+	    nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending))
+		return -EMSGSIZE;
+
+	timerval = br_timer_value(&p->message_age_timer);
+	if (nla_put_u64(skb, IFLA_BRPORT_MESSAGE_AGE_TIMER, timerval))
+		return -EMSGSIZE;
+	timerval = br_timer_value(&p->forward_delay_timer);
+	if (nla_put_u64(skb, IFLA_BRPORT_FORWARD_DELAY_TIMER, timerval))
+		return -EMSGSIZE;
+	timerval = br_timer_value(&p->hold_timer);
+	if (nla_put_u64(skb, IFLA_BRPORT_HOLD_TIMER, timerval))
 		return -EMSGSIZE;
 
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
+		       p->multicast_router))
+		return -EMSGSIZE;
+#endif
+
 	return 0;
 }
 
@@ -185,31 +241,33 @@ nla_put_failure:
 }
 
 static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
-					 const struct net_port_vlans *pv)
+					 struct net_bridge_vlan_group *vg)
 {
-	u16 vid_range_start = 0, vid_range_end = 0;
-	u16 vid_range_flags = 0;
-	u16 pvid, vid, flags;
+	struct net_bridge_vlan *v;
+	u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0;
+	u16 flags, pvid;
 	int err = 0;
 
 	/* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan
 	 * and mark vlan info with begin and end flags
 	 * if vlaninfo represents a range
 	 */
-	pvid = br_get_pvid(pv);
-	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+	pvid = br_get_pvid(vg);
+	list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
 		flags = 0;
-		if (vid == pvid)
+		if (!br_vlan_should_use(v))
+			continue;
+		if (v->vid == pvid)
 			flags |= BRIDGE_VLAN_INFO_PVID;
 
-		if (test_bit(vid, pv->untagged_bitmap))
+		if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
 			flags |= BRIDGE_VLAN_INFO_UNTAGGED;
 
 		if (vid_range_start == 0) {
 			goto initvars;
-		} else if ((vid - vid_range_end) == 1 &&
+		} else if ((v->vid - vid_range_end) == 1 &&
 			flags == vid_range_flags) {
-			vid_range_end = vid;
+			vid_range_end = v->vid;
 			continue;
 		} else {
 			err = br_fill_ifvlaninfo_range(skb, vid_range_start,
@@ -220,8 +278,8 @@ static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
 		}
 
 initvars:
-		vid_range_start = vid;
-		vid_range_end = vid;
+		vid_range_start = v->vid;
+		vid_range_end = v->vid;
 		vid_range_flags = flags;
 	}
 
@@ -238,19 +296,23 @@ initvars:
 }
 
 static int br_fill_ifvlaninfo(struct sk_buff *skb,
-			      const struct net_port_vlans *pv)
+			      struct net_bridge_vlan_group *vg)
 {
 	struct bridge_vlan_info vinfo;
-	u16 pvid, vid;
+	struct net_bridge_vlan *v;
+	u16 pvid;
+
+	pvid = br_get_pvid(vg);
+	list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
+		if (!br_vlan_should_use(v))
+			continue;
 
-	pvid = br_get_pvid(pv);
-	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
-		vinfo.vid = vid;
+		vinfo.vid = v->vid;
 		vinfo.flags = 0;
-		if (vid == pvid)
+		if (v->vid == pvid)
 			vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
 
-		if (test_bit(vid, pv->untagged_bitmap))
+		if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
 			vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
 
 		if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
@@ -269,11 +331,11 @@ nla_put_failure:
  * Contains port and master info as well as carrier and bridge state.
  */
 static int br_fill_ifinfo(struct sk_buff *skb,
-			  const struct net_bridge_port *port,
+			  struct net_bridge_port *port,
 			  u32 pid, u32 seq, int event, unsigned int flags,
 			  u32 filter_mask, const struct net_device *dev)
 {
-	const struct net_bridge *br;
+	struct net_bridge *br;
 	struct ifinfomsg *hdr;
 	struct nlmsghdr *nlh;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
@@ -320,26 +382,31 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 	/* Check if  the VID information is requested */
 	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
 	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
-		const struct net_port_vlans *pv;
+		struct net_bridge_vlan_group *vg;
 		struct nlattr *af;
 		int err;
 
+		/* RCU needed because of the VLAN locking rules (rcu || rtnl) */
+		rcu_read_lock();
 		if (port)
-			pv = nbp_get_vlan_info(port);
+			vg = nbp_vlan_group_rcu(port);
 		else
-			pv = br_get_vlan_info(br);
+			vg = br_vlan_group_rcu(br);
 
-		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
+		if (!vg || !vg->num_vlans) {
+			rcu_read_unlock();
 			goto done;
-
+		}
 		af = nla_nest_start(skb, IFLA_AF_SPEC);
-		if (!af)
+		if (!af) {
+			rcu_read_unlock();
 			goto nla_put_failure;
-
+		}
 		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
-			err = br_fill_ifvlaninfo_compressed(skb, pv);
+			err = br_fill_ifvlaninfo_compressed(skb, vg);
 		else
-			err = br_fill_ifvlaninfo(skb, pv);
+			err = br_fill_ifvlaninfo(skb, vg);
+		rcu_read_unlock();
 		if (err)
 			goto nla_put_failure;
 		nla_nest_end(skb, af);
@@ -413,14 +480,14 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
 	switch (cmd) {
 	case RTM_SETLINK:
 		if (p) {
+			/* if the MASTER flag is set this will act on the global
+			 * per-VLAN entry as well
+			 */
 			err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
 			if (err)
 				break;
-
-			if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
-				err = br_vlan_add(p->br, vinfo->vid,
-						  vinfo->flags);
 		} else {
+			vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
 			err = br_vlan_add(br, vinfo->vid, vinfo->flags);
 		}
 		break;
@@ -462,6 +529,9 @@ static int br_afspec(struct net_bridge *br,
 			if (vinfo_start)
 				return -EINVAL;
 			vinfo_start = vinfo;
+			/* don't allow range of pvids */
+			if (vinfo_start->flags & BRIDGE_VLAN_INFO_PVID)
+				return -EINVAL;
 			continue;
 		}
 
@@ -507,6 +577,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
 	[IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
 	[IFLA_BRPORT_PROXYARP]	= { .type = NLA_U8 },
 	[IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 },
+	[IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -578,6 +649,18 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 			return err;
 	}
 
+	if (tb[IFLA_BRPORT_FLUSH])
+		br_fdb_delete_by_port(p->br, p, 0, 0);
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) {
+		u8 mcast_router = nla_get_u8(tb[IFLA_BRPORT_MULTICAST_ROUTER]);
+
+		err = br_multicast_set_port_router(p, mcast_router);
+		if (err)
+			return err;
+	}
+#endif
 	br_port_flags_change(p, old_flags ^ p->flags);
 	return 0;
 }
@@ -744,6 +827,27 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
 	[IFLA_BR_PRIORITY] = { .type = NLA_U16 },
 	[IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 },
 	[IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 },
+	[IFLA_BR_GROUP_FWD_MASK] = { .type = NLA_U16 },
+	[IFLA_BR_GROUP_ADDR] = { .type = NLA_BINARY,
+				 .len  = ETH_ALEN },
+	[IFLA_BR_MCAST_ROUTER] = { .type = NLA_U8 },
+	[IFLA_BR_MCAST_SNOOPING] = { .type = NLA_U8 },
+	[IFLA_BR_MCAST_QUERY_USE_IFADDR] = { .type = NLA_U8 },
+	[IFLA_BR_MCAST_QUERIER] = { .type = NLA_U8 },
+	[IFLA_BR_MCAST_HASH_ELASTICITY] = { .type = NLA_U32 },
+	[IFLA_BR_MCAST_HASH_MAX] = { .type = NLA_U32 },
+	[IFLA_BR_MCAST_LAST_MEMBER_CNT] = { .type = NLA_U32 },
+	[IFLA_BR_MCAST_STARTUP_QUERY_CNT] = { .type = NLA_U32 },
+	[IFLA_BR_MCAST_LAST_MEMBER_INTVL] = { .type = NLA_U64 },
+	[IFLA_BR_MCAST_MEMBERSHIP_INTVL] = { .type = NLA_U64 },
+	[IFLA_BR_MCAST_QUERIER_INTVL] = { .type = NLA_U64 },
+	[IFLA_BR_MCAST_QUERY_INTVL] = { .type = NLA_U64 },
+	[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 },
+	[IFLA_BR_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 },
+	[IFLA_BR_NF_CALL_IPTABLES] = { .type = NLA_U8 },
+	[IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 },
+	[IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 },
+	[IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -774,9 +878,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 	}
 
 	if (data[IFLA_BR_AGEING_TIME]) {
-		u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]);
-
-		br->ageing_time = clock_t_to_jiffies(ageing_time);
+		err = br_set_ageing_time(br, nla_get_u32(data[IFLA_BR_AGEING_TIME]));
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_BR_STP_STATE]) {
@@ -807,6 +911,158 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 		if (err)
 			return err;
 	}
+
+	if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
+		__u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
+
+		err = __br_vlan_set_default_pvid(br, defpvid);
+		if (err)
+			return err;
+	}
+#endif
+
+	if (data[IFLA_BR_GROUP_FWD_MASK]) {
+		u16 fwd_mask = nla_get_u16(data[IFLA_BR_GROUP_FWD_MASK]);
+
+		if (fwd_mask & BR_GROUPFWD_RESTRICTED)
+			return -EINVAL;
+		br->group_fwd_mask = fwd_mask;
+	}
+
+	if (data[IFLA_BR_GROUP_ADDR]) {
+		u8 new_addr[ETH_ALEN];
+
+		if (nla_len(data[IFLA_BR_GROUP_ADDR]) != ETH_ALEN)
+			return -EINVAL;
+		memcpy(new_addr, nla_data(data[IFLA_BR_GROUP_ADDR]), ETH_ALEN);
+		if (!is_link_local_ether_addr(new_addr))
+			return -EINVAL;
+		if (new_addr[5] == 1 ||		/* 802.3x Pause address */
+		    new_addr[5] == 2 ||		/* 802.3ad Slow protocols */
+		    new_addr[5] == 3)		/* 802.1X PAE address */
+			return -EINVAL;
+		spin_lock_bh(&br->lock);
+		memcpy(br->group_addr, new_addr, sizeof(br->group_addr));
+		spin_unlock_bh(&br->lock);
+		br->group_addr_set = true;
+		br_recalculate_fwd_mask(br);
+	}
+
+	if (data[IFLA_BR_FDB_FLUSH])
+		br_fdb_flush(br);
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	if (data[IFLA_BR_MCAST_ROUTER]) {
+		u8 multicast_router = nla_get_u8(data[IFLA_BR_MCAST_ROUTER]);
+
+		err = br_multicast_set_router(br, multicast_router);
+		if (err)
+			return err;
+	}
+
+	if (data[IFLA_BR_MCAST_SNOOPING]) {
+		u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]);
+
+		err = br_multicast_toggle(br, mcast_snooping);
+		if (err)
+			return err;
+	}
+
+	if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) {
+		u8 val;
+
+		val = nla_get_u8(data[IFLA_BR_MCAST_QUERY_USE_IFADDR]);
+		br->multicast_query_use_ifaddr = !!val;
+	}
+
+	if (data[IFLA_BR_MCAST_QUERIER]) {
+		u8 mcast_querier = nla_get_u8(data[IFLA_BR_MCAST_QUERIER]);
+
+		err = br_multicast_set_querier(br, mcast_querier);
+		if (err)
+			return err;
+	}
+
+	if (data[IFLA_BR_MCAST_HASH_ELASTICITY]) {
+		u32 val = nla_get_u32(data[IFLA_BR_MCAST_HASH_ELASTICITY]);
+
+		br->hash_elasticity = val;
+	}
+
+	if (data[IFLA_BR_MCAST_HASH_MAX]) {
+		u32 hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
+
+		err = br_multicast_set_hash_max(br, hash_max);
+		if (err)
+			return err;
+	}
+
+	if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
+		u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
+
+		br->multicast_last_member_count = val;
+	}
+
+	if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) {
+		u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]);
+
+		br->multicast_startup_query_count = val;
+	}
+
+	if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) {
+		u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]);
+
+		br->multicast_last_member_interval = clock_t_to_jiffies(val);
+	}
+
+	if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) {
+		u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]);
+
+		br->multicast_membership_interval = clock_t_to_jiffies(val);
+	}
+
+	if (data[IFLA_BR_MCAST_QUERIER_INTVL]) {
+		u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]);
+
+		br->multicast_querier_interval = clock_t_to_jiffies(val);
+	}
+
+	if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
+		u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
+
+		br->multicast_query_interval = clock_t_to_jiffies(val);
+	}
+
+	if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
+		u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]);
+
+		br->multicast_query_response_interval = clock_t_to_jiffies(val);
+	}
+
+	if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
+		u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
+
+		br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+	}
+#endif
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	if (data[IFLA_BR_NF_CALL_IPTABLES]) {
+		u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IPTABLES]);
+
+		br->nf_call_iptables = val ? true : false;
+	}
+
+	if (data[IFLA_BR_NF_CALL_IP6TABLES]) {
+		u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IP6TABLES]);
+
+		br->nf_call_ip6tables = val ? true : false;
+	}
+
+	if (data[IFLA_BR_NF_CALL_ARPTABLES]) {
+		u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_ARPTABLES]);
+
+		br->nf_call_arptables = val ? true : false;
+	}
 #endif
 
 	return 0;
@@ -823,6 +1079,40 @@ static size_t br_get_size(const struct net_device *brdev)
 	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_VLAN_FILTERING */
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	       nla_total_size(sizeof(__be16)) +	/* IFLA_BR_VLAN_PROTOCOL */
+	       nla_total_size(sizeof(u16)) +    /* IFLA_BR_VLAN_DEFAULT_PVID */
+#endif
+	       nla_total_size(sizeof(u16)) +    /* IFLA_BR_GROUP_FWD_MASK */
+	       nla_total_size(sizeof(struct ifla_bridge_id)) +   /* IFLA_BR_ROOT_ID */
+	       nla_total_size(sizeof(struct ifla_bridge_id)) +   /* IFLA_BR_BRIDGE_ID */
+	       nla_total_size(sizeof(u16)) +    /* IFLA_BR_ROOT_PORT */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_ROOT_PATH_COST */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_TOPOLOGY_CHANGE */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_TOPOLOGY_CHANGE_DETECTED */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_HELLO_TIMER */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_TCN_TIMER */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_GC_TIMER */
+	       nla_total_size(ETH_ALEN) +       /* IFLA_BR_GROUP_ADDR */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_ROUTER */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_SNOOPING */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_QUERY_USE_IFADDR */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_MCAST_QUERIER */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_HASH_ELASTICITY */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_HASH_MAX */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_LAST_MEMBER_CNT */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_MCAST_STARTUP_QUERY_CNT */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_MCAST_LAST_MEMBER_INTVL */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_MCAST_MEMBERSHIP_INTVL */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_MCAST_QUERIER_INTVL */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_MCAST_QUERY_INTVL */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */
+	       nla_total_size(sizeof(u64)) +    /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
+#endif
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_IPTABLES */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_IP6TABLES */
+	       nla_total_size(sizeof(u8)) +     /* IFLA_BR_NF_CALL_ARPTABLES */
 #endif
 	       0;
 }
@@ -837,6 +1127,20 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	u32 stp_enabled = br->stp_enabled;
 	u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
 	u8 vlan_enabled = br_vlan_enabled(br);
+	u64 clockval;
+
+	clockval = br_timer_value(&br->hello_timer);
+	if (nla_put_u64(skb, IFLA_BR_HELLO_TIMER, clockval))
+		return -EMSGSIZE;
+	clockval = br_timer_value(&br->tcn_timer);
+	if (nla_put_u64(skb, IFLA_BR_TCN_TIMER, clockval))
+		return -EMSGSIZE;
+	clockval = br_timer_value(&br->topology_change_timer);
+	if (nla_put_u64(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval))
+		return -EMSGSIZE;
+	clockval = br_timer_value(&br->gc_timer);
+	if (nla_put_u64(skb, IFLA_BR_GC_TIMER, clockval))
+		return -EMSGSIZE;
 
 	if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
 	    nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
@@ -844,38 +1148,76 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	    nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) ||
 	    nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) ||
 	    nla_put_u16(skb, IFLA_BR_PRIORITY, priority) ||
-	    nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled))
+	    nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled) ||
+	    nla_put_u16(skb, IFLA_BR_GROUP_FWD_MASK, br->group_fwd_mask) ||
+	    nla_put(skb, IFLA_BR_BRIDGE_ID, sizeof(struct ifla_bridge_id),
+		    &br->bridge_id) ||
+	    nla_put(skb, IFLA_BR_ROOT_ID, sizeof(struct ifla_bridge_id),
+		    &br->designated_root) ||
+	    nla_put_u16(skb, IFLA_BR_ROOT_PORT, br->root_port) ||
+	    nla_put_u32(skb, IFLA_BR_ROOT_PATH_COST, br->root_path_cost) ||
+	    nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE, br->topology_change) ||
+	    nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+		       br->topology_change_detected) ||
+	    nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr))
 		return -EMSGSIZE;
 
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
-	if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto))
+	if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) ||
+	    nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid))
+		return -EMSGSIZE;
+#endif
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+	if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
+	    nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, !br->multicast_disabled) ||
+	    nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
+		       br->multicast_query_use_ifaddr) ||
+	    nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) ||
+	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
+			br->hash_elasticity) ||
+	    nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
+	    nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
+			br->multicast_last_member_count) ||
+	    nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+			br->multicast_startup_query_count))
+		return -EMSGSIZE;
+
+	clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
+	if (nla_put_u64(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval))
+		return -EMSGSIZE;
+	clockval = jiffies_to_clock_t(br->multicast_membership_interval);
+	if (nla_put_u64(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval))
+		return -EMSGSIZE;
+	clockval = jiffies_to_clock_t(br->multicast_querier_interval);
+	if (nla_put_u64(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval))
+		return -EMSGSIZE;
+	clockval = jiffies_to_clock_t(br->multicast_query_interval);
+	if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval))
+		return -EMSGSIZE;
+	clockval = jiffies_to_clock_t(br->multicast_query_response_interval);
+	if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval))
+		return -EMSGSIZE;
+	clockval = jiffies_to_clock_t(br->multicast_startup_query_interval);
+	if (nla_put_u64(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval))
+		return -EMSGSIZE;
+#endif
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+	if (nla_put_u8(skb, IFLA_BR_NF_CALL_IPTABLES,
+		       br->nf_call_iptables ? 1 : 0) ||
+	    nla_put_u8(skb, IFLA_BR_NF_CALL_IP6TABLES,
+		       br->nf_call_ip6tables ? 1 : 0) ||
+	    nla_put_u8(skb, IFLA_BR_NF_CALL_ARPTABLES,
+		       br->nf_call_arptables ? 1 : 0))
 		return -EMSGSIZE;
 #endif
 
 	return 0;
 }
 
-static size_t br_get_link_af_size(const struct net_device *dev)
-{
-	struct net_port_vlans *pv;
-
-	if (br_port_exists(dev))
-		pv = nbp_get_vlan_info(br_port_get_rtnl(dev));
-	else if (dev->priv_flags & IFF_EBRIDGE)
-		pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
-	else
-		return 0;
-
-	if (!pv)
-		return 0;
-
-	/* Each VLAN is returned in bridge_vlan_info along with flags */
-	return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
-}
 
 static struct rtnl_af_ops br_af_ops __read_mostly = {
 	.family			= AF_BRIDGE,
-	.get_link_af_size	= br_get_link_af_size,
+	.get_link_af_size	= br_get_link_af_size_filtered,
 };
 
 struct rtnl_link_ops br_link_ops __read_mostly = {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 213baf7aaa93..216018c76018 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -20,6 +20,7 @@
 #include <net/route.h>
 #include <net/ip6_fib.h>
 #include <linux/if_vlan.h>
+#include <linux/rhashtable.h>
 
 #define BR_HASH_BITS 8
 #define BR_HASH_SIZE (1 << BR_HASH_BITS)
@@ -28,7 +29,6 @@
 
 #define BR_PORT_BITS	10
 #define BR_MAX_PORTS	(1<<BR_PORT_BITS)
-#define BR_VLAN_BITMAP_LEN	BITS_TO_LONGS(VLAN_N_VID)
 
 #define BR_VERSION	"2.3"
 
@@ -77,17 +77,61 @@ struct bridge_mcast_querier {
 };
 #endif
 
-struct net_port_vlans {
-	u16				port_idx;
-	u16				pvid;
+/**
+ * struct net_bridge_vlan - per-vlan entry
+ *
+ * @vnode: rhashtable member
+ * @vid: VLAN id
+ * @flags: bridge vlan flags
+ * @br: if MASTER flag set, this points to a bridge struct
+ * @port: if MASTER flag unset, this points to a port struct
+ * @refcnt: if MASTER flag set, this is bumped for each port referencing it
+ * @brvlan: if MASTER flag unset, this points to the global per-VLAN context
+ *          for this VLAN entry
+ * @vlist: sorted list of VLAN entries
+ * @rcu: used for entry destruction
+ *
+ * This structure is shared between the global per-VLAN entries contained in
+ * the bridge rhashtable and the local per-port per-VLAN entries contained in
+ * the port's rhashtable. The union entries should be interpreted depending on
+ * the entry flags that are set.
+ */
+struct net_bridge_vlan {
+	struct rhash_head		vnode;
+	u16				vid;
+	u16				flags;
 	union {
-		struct net_bridge_port		*port;
-		struct net_bridge		*br;
-	}				parent;
+		struct net_bridge	*br;
+		struct net_bridge_port	*port;
+	};
+	union {
+		atomic_t		refcnt;
+		struct net_bridge_vlan	*brvlan;
+	};
+	struct list_head		vlist;
+
 	struct rcu_head			rcu;
-	unsigned long			vlan_bitmap[BR_VLAN_BITMAP_LEN];
-	unsigned long			untagged_bitmap[BR_VLAN_BITMAP_LEN];
+};
+
+/**
+ * struct net_bridge_vlan_group
+ *
+ * @vlan_hash: VLAN entry rhashtable
+ * @vlan_list: sorted VLAN entry list
+ * @num_vlans: number of total VLAN entries
+ * @pvid: PVID VLAN id
+ *
+ * IMPORTANT: Be careful when checking if there're VLAN entries using list
+ *            primitives because the bridge can have entries in its list which
+ *            are just for global context but not for filtering, i.e. they have
+ *            the master flag set but not the brentry flag. If you have to check
+ *            if there're "real" entries in the bridge please test @num_vlans
+ */
+struct net_bridge_vlan_group {
+	struct rhashtable		vlan_hash;
+	struct list_head		vlan_list;
 	u16				num_vlans;
+	u16				pvid;
 };
 
 struct net_bridge_fdb_entry
@@ -185,7 +229,7 @@ struct net_bridge_port
 	struct netpoll			*np;
 #endif
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
-	struct net_port_vlans __rcu	*vlan_info;
+	struct net_bridge_vlan_group	__rcu *vlgrp;
 #endif
 };
 
@@ -293,10 +337,10 @@ struct net_bridge
 	struct kobject			*ifobj;
 	u32				auto_cnt;
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
+	struct net_bridge_vlan_group	__rcu *vlgrp;
 	u8				vlan_enabled;
 	__be16				vlan_proto;
 	u16				default_pvid;
-	struct net_port_vlans __rcu	*vlan_info;
 #endif
 };
 
@@ -344,6 +388,31 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 	return !memcmp(&br->bridge_id, &br->designated_root, 8);
 }
 
+/* check if a VLAN entry is global */
+static inline bool br_vlan_is_master(const struct net_bridge_vlan *v)
+{
+	return v->flags & BRIDGE_VLAN_INFO_MASTER;
+}
+
+/* check if a VLAN entry is used by the bridge */
+static inline bool br_vlan_is_brentry(const struct net_bridge_vlan *v)
+{
+	return v->flags & BRIDGE_VLAN_INFO_BRENTRY;
+}
+
+/* check if we should use the vlan entry, returns false if it's only context */
+static inline bool br_vlan_should_use(const struct net_bridge_vlan *v)
+{
+	if (br_vlan_is_master(v)) {
+		if (br_vlan_is_brentry(v))
+			return true;
+		else
+			return false;
+	}
+
+	return true;
+}
+
 /* br_device.c */
 void br_dev_setup(struct net_device *dev);
 void br_dev_delete(struct net_device *dev, struct list_head *list);
@@ -413,10 +482,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 
 /* br_forward.c */
 void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
-int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb);
+int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_forward(const struct net_bridge_port *to,
 		struct sk_buff *skb, struct sk_buff *skb0);
-int br_forward_finish(struct sock *sk, struct sk_buff *skb);
+int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
 void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
 		      struct sk_buff *skb2, bool unicast);
@@ -434,7 +503,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
 void br_manage_promisc(struct net_bridge *br);
 
 /* br_input.c */
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
 
 static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
@@ -601,18 +670,19 @@ static inline void br_mdb_uninit(void)
 
 /* br_vlan.c */
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
-bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
-			struct sk_buff *skb, u16 *vid);
-bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v,
+bool br_allowed_ingress(const struct net_bridge *br,
+			struct net_bridge_vlan_group *vg, struct sk_buff *skb,
+			u16 *vid);
+bool br_allowed_egress(struct net_bridge_vlan_group *vg,
 		       const struct sk_buff *skb);
 bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
 struct sk_buff *br_handle_vlan(struct net_bridge *br,
-			       const struct net_port_vlans *v,
+			       struct net_bridge_vlan_group *vg,
 			       struct sk_buff *skb);
 int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
 int br_vlan_delete(struct net_bridge *br, u16 vid);
 void br_vlan_flush(struct net_bridge *br);
-bool br_vlan_find(struct net_bridge *br, u16 vid);
+struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
 void br_recalculate_fwd_mask(struct net_bridge *br);
 int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
 int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
@@ -620,22 +690,35 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
 int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
 int br_vlan_init(struct net_bridge *br);
 int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
 int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
 int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
 void nbp_vlan_flush(struct net_bridge_port *port);
-bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
 int nbp_vlan_init(struct net_bridge_port *port);
+int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
+
+static inline struct net_bridge_vlan_group *br_vlan_group(
+					const struct net_bridge *br)
+{
+	return rtnl_dereference(br->vlgrp);
+}
 
-static inline struct net_port_vlans *br_get_vlan_info(
-						const struct net_bridge *br)
+static inline struct net_bridge_vlan_group *nbp_vlan_group(
+					const struct net_bridge_port *p)
 {
-	return rcu_dereference_rtnl(br->vlan_info);
+	return rtnl_dereference(p->vlgrp);
 }
 
-static inline struct net_port_vlans *nbp_get_vlan_info(
-						const struct net_bridge_port *p)
+static inline struct net_bridge_vlan_group *br_vlan_group_rcu(
+					const struct net_bridge *br)
 {
-	return rcu_dereference_rtnl(p->vlan_info);
+	return rcu_dereference(br->vlgrp);
+}
+
+static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu(
+					const struct net_bridge_port *p)
+{
+	return rcu_dereference(p->vlgrp);
 }
 
 /* Since bridge now depends on 8021Q module, but the time bridge sees the
@@ -645,9 +728,9 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
 {
 	int err = 0;
 
-	if (skb_vlan_tag_present(skb))
+	if (skb_vlan_tag_present(skb)) {
 		*vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
-	else {
+	} else {
 		*vid = 0;
 		err = -EINVAL;
 	}
@@ -655,13 +738,13 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
 	return err;
 }
 
-static inline u16 br_get_pvid(const struct net_port_vlans *v)
+static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
 {
-	if (!v)
+	if (!vg)
 		return 0;
 
 	smp_rmb();
-	return v->pvid;
+	return vg->pvid;
 }
 
 static inline int br_vlan_enabled(struct net_bridge *br)
@@ -669,16 +752,15 @@ static inline int br_vlan_enabled(struct net_bridge *br)
 	return br->vlan_enabled;
 }
 #else
-static inline bool br_allowed_ingress(struct net_bridge *br,
-				      struct net_port_vlans *v,
+static inline bool br_allowed_ingress(const struct net_bridge *br,
+				      struct net_bridge_vlan_group *vg,
 				      struct sk_buff *skb,
 				      u16 *vid)
 {
 	return true;
 }
 
-static inline bool br_allowed_egress(struct net_bridge *br,
-				     const struct net_port_vlans *v,
+static inline bool br_allowed_egress(struct net_bridge_vlan_group *vg,
 				     const struct sk_buff *skb)
 {
 	return true;
@@ -691,7 +773,7 @@ static inline bool br_should_learn(struct net_bridge_port *p,
 }
 
 static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
-					     const struct net_port_vlans *v,
+					     struct net_bridge_vlan_group *vg,
 					     struct sk_buff *skb)
 {
 	return skb;
@@ -711,11 +793,6 @@ static inline void br_vlan_flush(struct net_bridge *br)
 {
 }
 
-static inline bool br_vlan_find(struct net_bridge *br, u16 vid)
-{
-	return false;
-}
-
 static inline void br_recalculate_fwd_mask(struct net_bridge *br)
 {
 }
@@ -739,22 +816,12 @@ static inline void nbp_vlan_flush(struct net_bridge_port *port)
 {
 }
 
-static inline struct net_port_vlans *br_get_vlan_info(
-						const struct net_bridge *br)
-{
-	return NULL;
-}
-static inline struct net_port_vlans *nbp_get_vlan_info(
-						const struct net_bridge_port *p)
+static inline struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg,
+						   u16 vid)
 {
 	return NULL;
 }
 
-static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
-{
-	return false;
-}
-
 static inline int nbp_vlan_init(struct net_bridge_port *port)
 {
 	return 0;
@@ -764,7 +831,8 @@ static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag)
 {
 	return 0;
 }
-static inline u16 br_get_pvid(const struct net_port_vlans *v)
+
+static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
 {
 	return 0;
 }
@@ -779,6 +847,37 @@ static inline int __br_vlan_filter_toggle(struct net_bridge *br,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
+					 u32 filter_mask)
+{
+	return 0;
+}
+
+static inline struct net_bridge_vlan_group *br_vlan_group(
+					const struct net_bridge *br)
+{
+	return NULL;
+}
+
+static inline struct net_bridge_vlan_group *nbp_vlan_group(
+					const struct net_bridge_port *p)
+{
+	return NULL;
+}
+
+static inline struct net_bridge_vlan_group *br_vlan_group_rcu(
+					const struct net_bridge *br)
+{
+	return NULL;
+}
+
+static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu(
+					const struct net_bridge_port *p)
+{
+	return NULL;
+}
+
 #endif
 
 struct nf_br_ops {
@@ -808,6 +907,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
 int br_set_forward_delay(struct net_bridge *br, unsigned long x);
 int br_set_hello_time(struct net_bridge *br, unsigned long x);
 int br_set_max_age(struct net_bridge *br, unsigned long x);
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time);
 
 
 /* br_stp_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index ed74ffaa851f..f7e8dee64fc8 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -40,14 +40,15 @@ void br_log_state(const struct net_bridge_port *p)
 void br_set_state(struct net_bridge_port *p, unsigned int state)
 {
 	struct switchdev_attr attr = {
-		.id = SWITCHDEV_ATTR_PORT_STP_STATE,
+		.id = SWITCHDEV_ATTR_ID_PORT_STP_STATE,
+		.flags = SWITCHDEV_F_DEFER,
 		.u.stp_state = state,
 	};
 	int err;
 
 	p->state = state;
 	err = switchdev_port_attr_set(p->dev, &attr);
-	if (err && err != -EOPNOTSUPP)
+	if (err)
 		br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
 				(unsigned int) p->port_no, p->dev->name);
 }
@@ -566,6 +567,29 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
 
 }
 
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
+{
+	struct switchdev_attr attr = {
+		.id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
+		.flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
+		.u.ageing_time = ageing_time,
+	};
+	unsigned long t = clock_t_to_jiffies(ageing_time);
+	int err;
+
+	if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
+		return -ERANGE;
+
+	err = switchdev_port_attr_set(br->dev, &attr);
+	if (err)
+		return err;
+
+	br->ageing_time = t;
+	mod_timer(&br->gc_timer, jiffies);
+
+	return 0;
+}
+
 void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
 {
 	br->bridge_forward_delay = t;
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 534fc4cd263e..5881fbc114a9 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -30,6 +30,12 @@
 
 #define LLC_RESERVE sizeof(struct llc_pdu_un)
 
+static int br_send_bpdu_finish(struct net *net, struct sock *sk,
+			       struct sk_buff *skb)
+{
+	return dev_queue_xmit(skb);
+}
+
 static void br_send_bpdu(struct net_bridge_port *p,
 			 const unsigned char *data, int length)
 {
@@ -54,9 +60,9 @@ static void br_send_bpdu(struct net_bridge_port *p,
 
 	skb_reset_mac_header(skb);
 
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
-		NULL, skb->dev,
-		dev_queue_xmit_sk);
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+		dev_net(p->dev), NULL, skb, NULL, skb->dev,
+		br_send_bpdu_finish);
 }
 
 static inline void br_set_ticks(unsigned char *dest, int j)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 4ca449a16132..fa53d7a89f48 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -15,6 +15,7 @@
 #include <linux/kmod.h>
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
+#include <net/switchdev.h>
 
 #include "br_private.h"
 #include "br_private_stp.h"
@@ -35,11 +36,22 @@ static inline port_id br_make_port_id(__u8 priority, __u16 port_no)
 /* called under bridge lock */
 void br_init_port(struct net_bridge_port *p)
 {
+	struct switchdev_attr attr = {
+		.id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
+		.flags = SWITCHDEV_F_SKIP_EOPNOTSUPP | SWITCHDEV_F_DEFER,
+		.u.ageing_time = p->br->ageing_time,
+	};
+	int err;
+
 	p->port_id = br_make_port_id(p->priority, p->port_no);
 	br_become_designated_port(p);
 	br_set_state(p, BR_STATE_BLOCKING);
 	p->topology_change_ack = 0;
 	p->config_pending = 0;
+
+	err = switchdev_port_attr_set(p->dev, &attr);
+	if (err)
+		netdev_err(p->dev, "failed to set HW ageing time\n");
 }
 
 /* called under bridge lock */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 4c97fc50fb70..8365bd53c421 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -102,8 +102,15 @@ static ssize_t ageing_time_show(struct device *d,
 
 static int set_ageing_time(struct net_bridge *br, unsigned long val)
 {
-	br->ageing_time = clock_t_to_jiffies(val);
-	return 0;
+	int ret;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	ret = br_set_ageing_time(br, val);
+	rtnl_unlock();
+
+	return ret;
 }
 
 static ssize_t ageing_time_store(struct device *d,
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 5f5a02b49a99..1394da63614a 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -6,86 +6,205 @@
 
 #include "br_private.h"
 
-static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid)
+static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg,
+			      const void *ptr)
 {
-	if (v->pvid == vid)
+	const struct net_bridge_vlan *vle = ptr;
+	u16 vid = *(u16 *)arg->key;
+
+	return vle->vid != vid;
+}
+
+static const struct rhashtable_params br_vlan_rht_params = {
+	.head_offset = offsetof(struct net_bridge_vlan, vnode),
+	.key_offset = offsetof(struct net_bridge_vlan, vid),
+	.key_len = sizeof(u16),
+	.nelem_hint = 3,
+	.locks_mul = 1,
+	.max_size = VLAN_N_VID,
+	.obj_cmpfn = br_vlan_cmp,
+	.automatic_shrinking = true,
+};
+
+static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
+{
+	return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
+}
+
+static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+{
+	if (vg->pvid == vid)
 		return;
 
 	smp_wmb();
-	v->pvid = vid;
+	vg->pvid = vid;
 }
 
-static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid)
+static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
 {
-	if (v->pvid != vid)
+	if (vg->pvid != vid)
 		return;
 
 	smp_wmb();
-	v->pvid = 0;
+	vg->pvid = 0;
 }
 
-static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
+static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
 {
+	struct net_bridge_vlan_group *vg;
+
+	if (br_vlan_is_master(v))
+		vg = br_vlan_group(v->br);
+	else
+		vg = nbp_vlan_group(v->port);
+
 	if (flags & BRIDGE_VLAN_INFO_PVID)
-		__vlan_add_pvid(v, vid);
+		__vlan_add_pvid(vg, v->vid);
 	else
-		__vlan_delete_pvid(v, vid);
+		__vlan_delete_pvid(vg, v->vid);
 
 	if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
-		set_bit(vid, v->untagged_bitmap);
+		v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
 	else
-		clear_bit(vid, v->untagged_bitmap);
+		v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
 }
 
 static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
 			  u16 vid, u16 flags)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
+	struct switchdev_obj_port_vlan v = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.flags = flags,
+		.vid_begin = vid,
+		.vid_end = vid,
+	};
 	int err;
 
-	/* If driver uses VLAN ndo ops, use 8021q to install vid
-	 * on device, otherwise try switchdev ops to install vid.
+	/* Try switchdev op first. In case it is not supported, fallback to
+	 * 8021q add.
 	 */
+	err = switchdev_port_obj_add(dev, &v.obj);
+	if (err == -EOPNOTSUPP)
+		return vlan_vid_add(dev, br->vlan_proto, vid);
+	return err;
+}
 
-	if (ops->ndo_vlan_rx_add_vid) {
-		err = vlan_vid_add(dev, br->vlan_proto, vid);
-	} else {
-		struct switchdev_obj vlan_obj = {
-			.id = SWITCHDEV_OBJ_PORT_VLAN,
-			.u.vlan = {
-				.flags = flags,
-				.vid_begin = vid,
-				.vid_end = vid,
-			},
-		};
+static void __vlan_add_list(struct net_bridge_vlan *v)
+{
+	struct net_bridge_vlan_group *vg;
+	struct list_head *headp, *hpos;
+	struct net_bridge_vlan *vent;
 
-		err = switchdev_port_obj_add(dev, &vlan_obj);
-		if (err == -EOPNOTSUPP)
-			err = 0;
+	if (br_vlan_is_master(v))
+		vg = br_vlan_group(v->br);
+	else
+		vg = nbp_vlan_group(v->port);
+
+	headp = &vg->vlan_list;
+	list_for_each_prev(hpos, headp) {
+		vent = list_entry(hpos, struct net_bridge_vlan, vlist);
+		if (v->vid < vent->vid)
+			continue;
+		else
+			break;
 	}
+	list_add_rcu(&v->vlist, hpos);
+}
 
-	return err;
+static void __vlan_del_list(struct net_bridge_vlan *v)
+{
+	list_del_rcu(&v->vlist);
 }
 
-static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
+static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
+			  u16 vid)
 {
-	struct net_bridge_port *p = NULL;
-	struct net_bridge *br;
-	struct net_device *dev;
+	struct switchdev_obj_port_vlan v = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+		.vid_begin = vid,
+		.vid_end = vid,
+	};
 	int err;
 
-	if (test_bit(vid, v->vlan_bitmap)) {
-		__vlan_add_flags(v, vid, flags);
+	/* Try switchdev op first. In case it is not supported, fallback to
+	 * 8021q del.
+	 */
+	err = switchdev_port_obj_del(dev, &v.obj);
+	if (err == -EOPNOTSUPP) {
+		vlan_vid_del(dev, br->vlan_proto, vid);
 		return 0;
 	}
+	return err;
+}
+
+/* Returns a master vlan, if it didn't exist it gets created. In all cases a
+ * a reference is taken to the master vlan before returning.
+ */
+static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid)
+{
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *masterv;
+
+	vg = br_vlan_group(br);
+	masterv = br_vlan_find(vg, vid);
+	if (!masterv) {
+		/* missing global ctx, create it now */
+		if (br_vlan_add(br, vid, 0))
+			return NULL;
+		masterv = br_vlan_find(vg, vid);
+		if (WARN_ON(!masterv))
+			return NULL;
+	}
+	atomic_inc(&masterv->refcnt);
+
+	return masterv;
+}
+
+static void br_vlan_put_master(struct net_bridge_vlan *masterv)
+{
+	struct net_bridge_vlan_group *vg;
+
+	if (!br_vlan_is_master(masterv))
+		return;
 
-	if (v->port_idx) {
-		p = v->parent.port;
+	vg = br_vlan_group(masterv->br);
+	if (atomic_dec_and_test(&masterv->refcnt)) {
+		rhashtable_remove_fast(&vg->vlan_hash,
+				       &masterv->vnode, br_vlan_rht_params);
+		__vlan_del_list(masterv);
+		kfree_rcu(masterv, rcu);
+	}
+}
+
+/* This is the shared VLAN add function which works for both ports and bridge
+ * devices. There are four possible calls to this function in terms of the
+ * vlan entry type:
+ * 1. vlan is being added on a port (no master flags, global entry exists)
+ * 2. vlan is being added on a bridge (both master and brentry flags)
+ * 3. vlan is being added on a port, but a global entry didn't exist which
+ *    is being created right now (master flag set, brentry flag unset), the
+ *    global entry is used for global per-vlan features, but not for filtering
+ * 4. same as 3 but with both master and brentry flags set so the entry
+ *    will be used for filtering in both the port and the bridge
+ */
+static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
+{
+	struct net_bridge_vlan *masterv = NULL;
+	struct net_bridge_port *p = NULL;
+	struct net_bridge_vlan_group *vg;
+	struct net_device *dev;
+	struct net_bridge *br;
+	int err;
+
+	if (br_vlan_is_master(v)) {
+		br = v->br;
+		dev = br->dev;
+		vg = br_vlan_group(br);
+	} else {
+		p = v->port;
 		br = p->br;
 		dev = p->dev;
-	} else {
-		br = v->parent.br;
-		dev = br->dev;
+		vg = nbp_vlan_group(p);
 	}
 
 	if (p) {
@@ -93,116 +212,140 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
 		 * This ensures tagged traffic enters the bridge when
 		 * promiscuous mode is disabled by br_manage_promisc().
 		 */
-		err = __vlan_vid_add(dev, br, vid, flags);
+		err = __vlan_vid_add(dev, br, v->vid, flags);
 		if (err)
-			return err;
-	}
+			goto out;
+
+		/* need to work on the master vlan too */
+		if (flags & BRIDGE_VLAN_INFO_MASTER) {
+			err = br_vlan_add(br, v->vid, flags |
+						      BRIDGE_VLAN_INFO_BRENTRY);
+			if (err)
+				goto out_filt;
+		}
 
-	err = br_fdb_insert(br, p, dev->dev_addr, vid);
-	if (err) {
-		br_err(br, "failed insert local address into bridge "
-		       "forwarding table\n");
-		goto out_filt;
+		masterv = br_vlan_get_master(br, v->vid);
+		if (!masterv)
+			goto out_filt;
+		v->brvlan = masterv;
 	}
 
-	set_bit(vid, v->vlan_bitmap);
-	v->num_vlans++;
-	__vlan_add_flags(v, vid, flags);
+	/* Add the dev mac and count the vlan only if it's usable */
+	if (br_vlan_should_use(v)) {
+		err = br_fdb_insert(br, p, dev->dev_addr, v->vid);
+		if (err) {
+			br_err(br, "failed insert local address into bridge forwarding table\n");
+			goto out_filt;
+		}
+		vg->num_vlans++;
+	}
 
-	return 0;
+	err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode,
+					    br_vlan_rht_params);
+	if (err)
+		goto out_fdb_insert;
 
-out_filt:
-	if (p)
-		vlan_vid_del(dev, br->vlan_proto, vid);
+	__vlan_add_list(v);
+	__vlan_add_flags(v, flags);
+out:
 	return err;
-}
 
-static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
-			  u16 vid)
-{
-	const struct net_device_ops *ops = dev->netdev_ops;
-	int err = 0;
-
-	/* If driver uses VLAN ndo ops, use 8021q to delete vid
-	 * on device, otherwise try switchdev ops to delete vid.
-	 */
-
-	if (ops->ndo_vlan_rx_kill_vid) {
-		vlan_vid_del(dev, br->vlan_proto, vid);
-	} else {
-		struct switchdev_obj vlan_obj = {
-			.id = SWITCHDEV_OBJ_PORT_VLAN,
-			.u.vlan = {
-				.vid_begin = vid,
-				.vid_end = vid,
-			},
-		};
+out_fdb_insert:
+	if (br_vlan_should_use(v)) {
+		br_fdb_find_delete_local(br, p, dev->dev_addr, v->vid);
+		vg->num_vlans--;
+	}
 
-		err = switchdev_port_obj_del(dev, &vlan_obj);
-		if (err == -EOPNOTSUPP)
-			err = 0;
+out_filt:
+	if (p) {
+		__vlan_vid_del(dev, br, v->vid);
+		if (masterv) {
+			br_vlan_put_master(masterv);
+			v->brvlan = NULL;
+		}
 	}
 
-	return err;
+	goto out;
 }
 
-static int __vlan_del(struct net_port_vlans *v, u16 vid)
+static int __vlan_del(struct net_bridge_vlan *v)
 {
-	if (!test_bit(vid, v->vlan_bitmap))
-		return -EINVAL;
-
-	__vlan_delete_pvid(v, vid);
-	clear_bit(vid, v->untagged_bitmap);
+	struct net_bridge_vlan *masterv = v;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_port *p = NULL;
+	int err = 0;
 
-	if (v->port_idx) {
-		struct net_bridge_port *p = v->parent.port;
-		int err;
+	if (br_vlan_is_master(v)) {
+		vg = br_vlan_group(v->br);
+	} else {
+		p = v->port;
+		vg = nbp_vlan_group(v->port);
+		masterv = v->brvlan;
+	}
 
-		err = __vlan_vid_del(p->dev, p->br, vid);
+	__vlan_delete_pvid(vg, v->vid);
+	if (p) {
+		err = __vlan_vid_del(p->dev, p->br, v->vid);
 		if (err)
-			return err;
+			goto out;
 	}
 
-	clear_bit(vid, v->vlan_bitmap);
-	v->num_vlans--;
-	if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
-		if (v->port_idx)
-			RCU_INIT_POINTER(v->parent.port->vlan_info, NULL);
-		else
-			RCU_INIT_POINTER(v->parent.br->vlan_info, NULL);
+	if (br_vlan_should_use(v)) {
+		v->flags &= ~BRIDGE_VLAN_INFO_BRENTRY;
+		vg->num_vlans--;
+	}
+
+	if (masterv != v) {
+		rhashtable_remove_fast(&vg->vlan_hash, &v->vnode,
+				       br_vlan_rht_params);
+		__vlan_del_list(v);
 		kfree_rcu(v, rcu);
 	}
-	return 0;
+
+	br_vlan_put_master(masterv);
+out:
+	return err;
 }
 
-static void __vlan_flush(struct net_port_vlans *v)
+static void __vlan_group_free(struct net_bridge_vlan_group *vg)
 {
-	smp_wmb();
-	v->pvid = 0;
-	bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
-	if (v->port_idx)
-		RCU_INIT_POINTER(v->parent.port->vlan_info, NULL);
-	else
-		RCU_INIT_POINTER(v->parent.br->vlan_info, NULL);
-	kfree_rcu(v, rcu);
+	WARN_ON(!list_empty(&vg->vlan_list));
+	rhashtable_destroy(&vg->vlan_hash);
+	kfree(vg);
+}
+
+static void __vlan_flush(struct net_bridge_vlan_group *vg)
+{
+	struct net_bridge_vlan *vlan, *tmp;
+
+	__vlan_delete_pvid(vg, vg->pvid);
+	list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist)
+		__vlan_del(vlan);
 }
 
 struct sk_buff *br_handle_vlan(struct net_bridge *br,
-			       const struct net_port_vlans *pv,
+			       struct net_bridge_vlan_group *vg,
 			       struct sk_buff *skb)
 {
+	struct net_bridge_vlan *v;
 	u16 vid;
 
 	/* If this packet was not filtered at input, let it pass */
 	if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
 		goto out;
 
-	/* Vlan filter table must be configured at this point.  The
+	/* At this point, we know that the frame was filtered and contains
+	 * a valid vlan id.  If the vlan id has untagged flag set,
+	 * send untagged; otherwise, send tagged.
+	 */
+	br_vlan_get_tag(skb, &vid);
+	v = br_vlan_find(vg, vid);
+	/* Vlan entry must be configured at this point.  The
 	 * only exception is the bridge is set in promisc mode and the
 	 * packet is destined for the bridge device.  In this case
 	 * pass the packet as is.
 	 */
-	if (!pv) {
+	if (!v || !br_vlan_should_use(v)) {
 		if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) {
 			goto out;
 		} else {
@@ -210,13 +353,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
 			return NULL;
 		}
 	}
-
-	/* At this point, we know that the frame was filtered and contains
-	 * a valid vlan id.  If the vlan id is set in the untagged bitmap,
-	 * send untagged; otherwise, send tagged.
-	 */
-	br_vlan_get_tag(skb, &vid);
-	if (test_bit(vid, pv->untagged_bitmap))
+	if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
 		skb->vlan_tci = 0;
 
 out:
@@ -224,29 +361,13 @@ out:
 }
 
 /* Called under RCU */
-bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
-			struct sk_buff *skb, u16 *vid)
+static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
+			      struct sk_buff *skb, u16 *vid)
 {
+	const struct net_bridge_vlan *v;
 	bool tagged;
-	__be16 proto;
-
-	/* If VLAN filtering is disabled on the bridge, all packets are
-	 * permitted.
-	 */
-	if (!br->vlan_enabled) {
-		BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
-		return true;
-	}
-
-	/* If there are no vlan in the permitted list, all packets are
-	 * rejected.
-	 */
-	if (!v)
-		goto drop;
 
 	BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
-	proto = br->vlan_proto;
-
 	/* If vlan tx offload is disabled on bridge device and frame was
 	 * sent from vlan device on the bridge device, it does not have
 	 * HW accelerated vlan tag.
@@ -281,7 +402,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	}
 
 	if (!*vid) {
-		u16 pvid = br_get_pvid(v);
+		u16 pvid = br_get_pvid(vg);
 
 		/* Frame had a tag with VID 0 or did not have a tag.
 		 * See if pvid is set on this port.  That tells us which
@@ -309,29 +430,43 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	}
 
 	/* Frame had a valid vlan tag.  See if vlan is allowed */
-	if (test_bit(*vid, v->vlan_bitmap))
+	v = br_vlan_find(vg, *vid);
+	if (v && br_vlan_should_use(v))
 		return true;
 drop:
 	kfree_skb(skb);
 	return false;
 }
 
+bool br_allowed_ingress(const struct net_bridge *br,
+			struct net_bridge_vlan_group *vg, struct sk_buff *skb,
+			u16 *vid)
+{
+	/* If VLAN filtering is disabled on the bridge, all packets are
+	 * permitted.
+	 */
+	if (!br->vlan_enabled) {
+		BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
+		return true;
+	}
+
+	return __allowed_ingress(vg, br->vlan_proto, skb, vid);
+}
+
 /* Called under RCU. */
-bool br_allowed_egress(struct net_bridge *br,
-		       const struct net_port_vlans *v,
+bool br_allowed_egress(struct net_bridge_vlan_group *vg,
 		       const struct sk_buff *skb)
 {
+	const struct net_bridge_vlan *v;
 	u16 vid;
 
 	/* If this packet was not filtered at input, let it pass */
 	if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
 		return true;
 
-	if (!v)
-		return false;
-
 	br_vlan_get_tag(skb, &vid);
-	if (test_bit(vid, v->vlan_bitmap))
+	v = br_vlan_find(vg, vid);
+	if (v && br_vlan_should_use(v))
 		return true;
 
 	return false;
@@ -340,29 +475,29 @@ bool br_allowed_egress(struct net_bridge *br,
 /* Called under RCU */
 bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
 {
+	struct net_bridge_vlan_group *vg;
 	struct net_bridge *br = p->br;
-	struct net_port_vlans *v;
 
 	/* If filtering was disabled at input, let it pass. */
 	if (!br->vlan_enabled)
 		return true;
 
-	v = rcu_dereference(p->vlan_info);
-	if (!v)
+	vg = nbp_vlan_group_rcu(p);
+	if (!vg || !vg->num_vlans)
 		return false;
 
 	if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto)
 		*vid = 0;
 
 	if (!*vid) {
-		*vid = br_get_pvid(v);
+		*vid = br_get_pvid(vg);
 		if (!*vid)
 			return false;
 
 		return true;
 	}
 
-	if (test_bit(*vid, v->vlan_bitmap))
+	if (br_vlan_find(vg, *vid))
 		return true;
 
 	return false;
@@ -373,31 +508,49 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
  */
 int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
 {
-	struct net_port_vlans *pv = NULL;
-	int err;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *vlan;
+	int ret;
 
 	ASSERT_RTNL();
 
-	pv = rtnl_dereference(br->vlan_info);
-	if (pv)
-		return __vlan_add(pv, vid, flags);
+	vg = br_vlan_group(br);
+	vlan = br_vlan_find(vg, vid);
+	if (vlan) {
+		if (!br_vlan_is_brentry(vlan)) {
+			/* Trying to change flags of non-existent bridge vlan */
+			if (!(flags & BRIDGE_VLAN_INFO_BRENTRY))
+				return -EINVAL;
+			/* It was only kept for port vlans, now make it real */
+			ret = br_fdb_insert(br, NULL, br->dev->dev_addr,
+					    vlan->vid);
+			if (ret) {
+				br_err(br, "failed insert local address into bridge forwarding table\n");
+				return ret;
+			}
+			atomic_inc(&vlan->refcnt);
+			vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
+			vg->num_vlans++;
+		}
+		__vlan_add_flags(vlan, flags);
+		return 0;
+	}
 
-	/* Create port vlan infomration
-	 */
-	pv = kzalloc(sizeof(*pv), GFP_KERNEL);
-	if (!pv)
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
 		return -ENOMEM;
 
-	pv->parent.br = br;
-	err = __vlan_add(pv, vid, flags);
-	if (err)
-		goto out;
+	vlan->vid = vid;
+	vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER;
+	vlan->flags &= ~BRIDGE_VLAN_INFO_PVID;
+	vlan->br = br;
+	if (flags & BRIDGE_VLAN_INFO_BRENTRY)
+		atomic_set(&vlan->refcnt, 1);
+	ret = __vlan_add(vlan, flags);
+	if (ret)
+		kfree(vlan);
 
-	rcu_assign_pointer(br->vlan_info, pv);
-	return 0;
-out:
-	kfree(pv);
-	return err;
+	return ret;
 }
 
 /* Must be protected by RTNL.
@@ -405,49 +558,41 @@ out:
  */
 int br_vlan_delete(struct net_bridge *br, u16 vid)
 {
-	struct net_port_vlans *pv;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *v;
 
 	ASSERT_RTNL();
 
-	pv = rtnl_dereference(br->vlan_info);
-	if (!pv)
-		return -EINVAL;
+	vg = br_vlan_group(br);
+	v = br_vlan_find(vg, vid);
+	if (!v || !br_vlan_is_brentry(v))
+		return -ENOENT;
 
 	br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid);
+	br_fdb_delete_by_port(br, NULL, vid, 0);
 
-	__vlan_del(pv, vid);
-	return 0;
+	return __vlan_del(v);
 }
 
 void br_vlan_flush(struct net_bridge *br)
 {
-	struct net_port_vlans *pv;
+	struct net_bridge_vlan_group *vg;
 
 	ASSERT_RTNL();
-	pv = rtnl_dereference(br->vlan_info);
-	if (!pv)
-		return;
 
-	__vlan_flush(pv);
+	vg = br_vlan_group(br);
+	__vlan_flush(vg);
+	RCU_INIT_POINTER(br->vlgrp, NULL);
+	synchronize_rcu();
+	__vlan_group_free(vg);
 }
 
-bool br_vlan_find(struct net_bridge *br, u16 vid)
+struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid)
 {
-	struct net_port_vlans *pv;
-	bool found = false;
-
-	rcu_read_lock();
-	pv = rcu_dereference(br->vlan_info);
-
-	if (!pv)
-		goto out;
-
-	if (test_bit(vid, pv->vlan_bitmap))
-		found = true;
+	if (!vg)
+		return NULL;
 
-out:
-	rcu_read_unlock();
-	return found;
+	return br_vlan_lookup(&vg->vlan_hash, vid);
 }
 
 /* Must be protected by RTNL. */
@@ -505,21 +650,18 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
 {
 	int err = 0;
 	struct net_bridge_port *p;
-	struct net_port_vlans *pv;
+	struct net_bridge_vlan *vlan;
+	struct net_bridge_vlan_group *vg;
 	__be16 oldproto;
-	u16 vid, errvid;
 
 	if (br->vlan_proto == proto)
 		return 0;
 
 	/* Add VLANs for the new proto to the device filter. */
 	list_for_each_entry(p, &br->port_list, list) {
-		pv = rtnl_dereference(p->vlan_info);
-		if (!pv)
-			continue;
-
-		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
-			err = vlan_vid_add(p->dev, proto, vid);
+		vg = nbp_vlan_group(p);
+		list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+			err = vlan_vid_add(p->dev, proto, vlan->vid);
 			if (err)
 				goto err_filt;
 		}
@@ -533,28 +675,21 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
 
 	/* Delete VLANs for the old proto from the device filter. */
 	list_for_each_entry(p, &br->port_list, list) {
-		pv = rtnl_dereference(p->vlan_info);
-		if (!pv)
-			continue;
-
-		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
-			vlan_vid_del(p->dev, oldproto, vid);
+		vg = nbp_vlan_group(p);
+		list_for_each_entry(vlan, &vg->vlan_list, vlist)
+			vlan_vid_del(p->dev, oldproto, vlan->vid);
 	}
 
 	return 0;
 
 err_filt:
-	errvid = vid;
-	for_each_set_bit(vid, pv->vlan_bitmap, errvid)
-		vlan_vid_del(p->dev, proto, vid);
+	list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist)
+		vlan_vid_del(p->dev, proto, vlan->vid);
 
 	list_for_each_entry_continue_reverse(p, &br->port_list, list) {
-		pv = rtnl_dereference(p->vlan_info);
-		if (!pv)
-			continue;
-
-		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
-			vlan_vid_del(p->dev, proto, vid);
+		vg = nbp_vlan_group(p);
+		list_for_each_entry(vlan, &vg->vlan_list, vlist)
+			vlan_vid_del(p->dev, proto, vlan->vid);
 	}
 
 	return err;
@@ -576,9 +711,19 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
 	return err;
 }
 
-static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid)
+static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid)
 {
-	return pv && vid == pv->pvid && test_bit(vid, pv->untagged_bitmap);
+	struct net_bridge_vlan *v;
+
+	if (vid != vg->pvid)
+		return false;
+
+	v = br_vlan_lookup(&vg->vlan_hash, vid);
+	if (v && br_vlan_should_use(v) &&
+	    (v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
+		return true;
+
+	return false;
 }
 
 static void br_vlan_disable_default_pvid(struct net_bridge *br)
@@ -589,24 +734,31 @@ static void br_vlan_disable_default_pvid(struct net_bridge *br)
 	/* Disable default_pvid on all ports where it is still
 	 * configured.
 	 */
-	if (vlan_default_pvid(br_get_vlan_info(br), pvid))
+	if (vlan_default_pvid(br_vlan_group(br), pvid))
 		br_vlan_delete(br, pvid);
 
 	list_for_each_entry(p, &br->port_list, list) {
-		if (vlan_default_pvid(nbp_get_vlan_info(p), pvid))
+		if (vlan_default_pvid(nbp_vlan_group(p), pvid))
 			nbp_vlan_delete(p, pvid);
 	}
 
 	br->default_pvid = 0;
 }
 
-static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
 {
+	const struct net_bridge_vlan *pvent;
+	struct net_bridge_vlan_group *vg;
 	struct net_bridge_port *p;
 	u16 old_pvid;
 	int err = 0;
 	unsigned long *changed;
 
+	if (!pvid) {
+		br_vlan_disable_default_pvid(br);
+		return 0;
+	}
+
 	changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
 			  GFP_KERNEL);
 	if (!changed)
@@ -617,11 +769,14 @@ static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
 	/* Update default_pvid config only if we do not conflict with
 	 * user configuration.
 	 */
-	if ((!old_pvid || vlan_default_pvid(br_get_vlan_info(br), old_pvid)) &&
-	    !br_vlan_find(br, pvid)) {
+	vg = br_vlan_group(br);
+	pvent = br_vlan_find(vg, pvid);
+	if ((!old_pvid || vlan_default_pvid(vg, old_pvid)) &&
+	    (!pvent || !br_vlan_should_use(pvent))) {
 		err = br_vlan_add(br, pvid,
 				  BRIDGE_VLAN_INFO_PVID |
-				  BRIDGE_VLAN_INFO_UNTAGGED);
+				  BRIDGE_VLAN_INFO_UNTAGGED |
+				  BRIDGE_VLAN_INFO_BRENTRY);
 		if (err)
 			goto out;
 		br_vlan_delete(br, old_pvid);
@@ -632,9 +787,10 @@ static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
 		/* Update default_pvid config only if we do not conflict with
 		 * user configuration.
 		 */
+		vg = nbp_vlan_group(p);
 		if ((old_pvid &&
-		     !vlan_default_pvid(nbp_get_vlan_info(p), old_pvid)) ||
-		    nbp_vlan_find(p, pvid))
+		     !vlan_default_pvid(vg, old_pvid)) ||
+		    br_vlan_find(vg, pvid))
 			continue;
 
 		err = nbp_vlan_add(p, pvid,
@@ -668,7 +824,8 @@ err_port:
 		if (old_pvid)
 			br_vlan_add(br, old_pvid,
 				    BRIDGE_VLAN_INFO_PVID |
-				    BRIDGE_VLAN_INFO_UNTAGGED);
+				    BRIDGE_VLAN_INFO_UNTAGGED |
+				    BRIDGE_VLAN_INFO_BRENTRY);
 		br_vlan_delete(br, pvid);
 	}
 	goto out;
@@ -694,12 +851,7 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
 		err = -EPERM;
 		goto unlock;
 	}
-
-	if (!pvid)
-		br_vlan_disable_default_pvid(br);
-	else
-		err = __br_vlan_set_default_pvid(br, pvid);
-
+	err = __br_vlan_set_default_pvid(br, pvid);
 unlock:
 	rtnl_unlock();
 	return err;
@@ -707,10 +859,68 @@ unlock:
 
 int br_vlan_init(struct net_bridge *br)
 {
+	struct net_bridge_vlan_group *vg;
+	int ret = -ENOMEM;
+
+	vg = kzalloc(sizeof(*vg), GFP_KERNEL);
+	if (!vg)
+		goto out;
+	ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params);
+	if (ret)
+		goto err_rhtbl;
+	INIT_LIST_HEAD(&vg->vlan_list);
 	br->vlan_proto = htons(ETH_P_8021Q);
 	br->default_pvid = 1;
-	return br_vlan_add(br, 1,
-			   BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED);
+	rcu_assign_pointer(br->vlgrp, vg);
+	ret = br_vlan_add(br, 1,
+			  BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
+			  BRIDGE_VLAN_INFO_BRENTRY);
+	if (ret)
+		goto err_vlan_add;
+
+out:
+	return ret;
+
+err_vlan_add:
+	rhashtable_destroy(&vg->vlan_hash);
+err_rhtbl:
+	kfree(vg);
+
+	goto out;
+}
+
+int nbp_vlan_init(struct net_bridge_port *p)
+{
+	struct net_bridge_vlan_group *vg;
+	int ret = -ENOMEM;
+
+	vg = kzalloc(sizeof(struct net_bridge_vlan_group), GFP_KERNEL);
+	if (!vg)
+		goto out;
+
+	ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params);
+	if (ret)
+		goto err_rhtbl;
+	INIT_LIST_HEAD(&vg->vlan_list);
+	rcu_assign_pointer(p->vlgrp, vg);
+	if (p->br->default_pvid) {
+		ret = nbp_vlan_add(p, p->br->default_pvid,
+				   BRIDGE_VLAN_INFO_PVID |
+				   BRIDGE_VLAN_INFO_UNTAGGED);
+		if (ret)
+			goto err_vlan_add;
+	}
+out:
+	return ret;
+
+err_vlan_add:
+	RCU_INIT_POINTER(p->vlgrp, NULL);
+	synchronize_rcu();
+	rhashtable_destroy(&vg->vlan_hash);
+err_rhtbl:
+	kfree(vg);
+
+	goto out;
 }
 
 /* Must be protected by RTNL.
@@ -718,35 +928,28 @@ int br_vlan_init(struct net_bridge *br)
  */
 int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
 {
-	struct net_port_vlans *pv = NULL;
-	int err;
+	struct net_bridge_vlan *vlan;
+	int ret;
 
 	ASSERT_RTNL();
 
-	pv = rtnl_dereference(port->vlan_info);
-	if (pv)
-		return __vlan_add(pv, vid, flags);
-
-	/* Create port vlan infomration
-	 */
-	pv = kzalloc(sizeof(*pv), GFP_KERNEL);
-	if (!pv) {
-		err = -ENOMEM;
-		goto clean_up;
+	vlan = br_vlan_find(nbp_vlan_group(port), vid);
+	if (vlan) {
+		__vlan_add_flags(vlan, flags);
+		return 0;
 	}
 
-	pv->port_idx = port->port_no;
-	pv->parent.port = port;
-	err = __vlan_add(pv, vid, flags);
-	if (err)
-		goto clean_up;
+	vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+	if (!vlan)
+		return -ENOMEM;
 
-	rcu_assign_pointer(port->vlan_info, pv);
-	return 0;
+	vlan->vid = vid;
+	vlan->port = port;
+	ret = __vlan_add(vlan, flags);
+	if (ret)
+		kfree(vlan);
 
-clean_up:
-	kfree(pv);
-	return err;
+	return ret;
 }
 
 /* Must be protected by RTNL.
@@ -754,61 +957,28 @@ clean_up:
  */
 int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
 {
-	struct net_port_vlans *pv;
+	struct net_bridge_vlan *v;
 
 	ASSERT_RTNL();
 
-	pv = rtnl_dereference(port->vlan_info);
-	if (!pv)
-		return -EINVAL;
-
+	v = br_vlan_find(nbp_vlan_group(port), vid);
+	if (!v)
+		return -ENOENT;
 	br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
 	br_fdb_delete_by_port(port->br, port, vid, 0);
 
-	return __vlan_del(pv, vid);
+	return __vlan_del(v);
 }
 
 void nbp_vlan_flush(struct net_bridge_port *port)
 {
-	struct net_port_vlans *pv;
-	u16 vid;
+	struct net_bridge_vlan_group *vg;
 
 	ASSERT_RTNL();
 
-	pv = rtnl_dereference(port->vlan_info);
-	if (!pv)
-		return;
-
-	for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
-		vlan_vid_del(port->dev, port->br->vlan_proto, vid);
-
-	__vlan_flush(pv);
-}
-
-bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
-{
-	struct net_port_vlans *pv;
-	bool found = false;
-
-	rcu_read_lock();
-	pv = rcu_dereference(port->vlan_info);
-
-	if (!pv)
-		goto out;
-
-	if (test_bit(vid, pv->vlan_bitmap))
-		found = true;
-
-out:
-	rcu_read_unlock();
-	return found;
-}
-
-int nbp_vlan_init(struct net_bridge_port *p)
-{
-	return p->br->default_pvid ?
-			nbp_vlan_add(p, p->br->default_pvid,
-				     BRIDGE_VLAN_INFO_PVID |
-				     BRIDGE_VLAN_INFO_UNTAGGED) :
-			0;
+	vg = nbp_vlan_group(port);
+	__vlan_flush(vg);
+	RCU_INIT_POINTER(port->vlgrp, NULL);
+	synchronize_rcu();
+	__vlan_group_free(vg);
 }
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 17f2e4bc2a29..0ad639a96142 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -180,7 +180,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_log_info *info = par->targinfo;
 	struct nf_loginfo li;
-	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct net *net = par->net;
 
 	li.type = NF_LOG_TYPE_LOG;
 	li.u.log.level = info->loglevel;
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 59ac7952010d..54816150608e 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,7 +24,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
-	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct net *net = par->net;
 
 	li.type = NF_LOG_TYPE_ULOG;
 	li.u.ulog.copy_len = info->len;
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d2cdf5d6e98c..ec94c6f1ae88 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -50,10 +50,14 @@ static const struct ebt_table broute_table = {
 
 static int ebt_broute(struct sk_buff *skb)
 {
+	struct nf_hook_state state;
 	int ret;
 
-	ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
-			   dev_net(skb->dev)->xt.broute_table);
+	nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN,
+			   NFPROTO_BRIDGE, skb->dev, NULL, NULL,
+			   dev_net(skb->dev), NULL);
+
+	ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
 	if (ret == NF_DROP)
 		return 1; /* route it */
 	return 0; /* bridge it */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 8a3f63b2e807..32eccd101f26 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -57,39 +57,34 @@ static const struct ebt_table frame_filter = {
 };
 
 static unsigned int
-ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_in_hook(void *priv, struct sk_buff *skb,
 	    const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
-			    dev_net(state->in)->xt.frame_filter);
+	return ebt_do_table(skb, state, state->net->xt.frame_filter);
 }
 
 static unsigned int
-ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_out_hook(void *priv, struct sk_buff *skb,
 	     const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
-			    dev_net(state->out)->xt.frame_filter);
+	return ebt_do_table(skb, state, state->net->xt.frame_filter);
 }
 
 static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
 	{
 		.hook		= ebt_in_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_IN,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
 		.hook		= ebt_in_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_FORWARD,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
 		.hook		= ebt_out_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_FILTER_OTHER,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index c5ef5b1ab678..ec55358f00c8 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -57,39 +57,34 @@ static struct ebt_table frame_nat = {
 };
 
 static unsigned int
-ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_nat_in(void *priv, struct sk_buff *skb,
 	   const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
-			    dev_net(state->in)->xt.frame_nat);
+	return ebt_do_table(skb, state, state->net->xt.frame_nat);
 }
 
 static unsigned int
-ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_nat_out(void *priv, struct sk_buff *skb,
 	    const struct nf_hook_state *state)
 {
-	return ebt_do_table(ops->hooknum, skb, state->in, state->out,
-			    dev_net(state->out)->xt.frame_nat);
+	return ebt_do_table(skb, state, state->net->xt.frame_nat);
 }
 
 static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
 	{
 		.hook		= ebt_nat_out,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_NAT_DST_OTHER,
 	},
 	{
 		.hook		= ebt_nat_out,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_POST_ROUTING,
 		.priority	= NF_BR_PRI_NAT_SRC,
 	},
 	{
 		.hook		= ebt_nat_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_PRE_ROUTING,
 		.priority	= NF_BR_PRI_NAT_DST_BRIDGED,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 48b6b01295de..f46ca417bf2d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -183,10 +183,11 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
 }
 
 /* Do some firewalling */
-unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
-   const struct net_device *in, const struct net_device *out,
-   struct ebt_table *table)
+unsigned int ebt_do_table(struct sk_buff *skb,
+			  const struct nf_hook_state *state,
+			  struct ebt_table *table)
 {
+	unsigned int hook = state->hook;
 	int i, nentries;
 	struct ebt_entry *point;
 	struct ebt_counter *counter_base, *cb_base;
@@ -199,8 +200,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	struct xt_action_param acpar;
 
 	acpar.family  = NFPROTO_BRIDGE;
-	acpar.in      = in;
-	acpar.out     = out;
+	acpar.net     = state->net;
+	acpar.in      = state->in;
+	acpar.out     = state->out;
 	acpar.hotdrop = false;
 	acpar.hooknum = hook;
 
@@ -220,7 +222,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	base = private->entries;
 	i = 0;
 	while (i < nentries) {
-		if (ebt_basic_match(point, skb, in, out))
+		if (ebt_basic_match(point, skb, state->in, state->out))
 			goto letscontinue;
 
 		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index a343e62442b1..62f6b1b19589 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -65,31 +65,29 @@ int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
 EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate);
 
 static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
-					       const struct nf_hook_ops *ops,
 					       struct sk_buff *skb,
 					       const struct nf_hook_state *state)
 {
 	if (nft_bridge_iphdr_validate(skb))
-		nft_set_pktinfo_ipv4(pkt, ops, skb, state);
+		nft_set_pktinfo_ipv4(pkt, skb, state);
 	else
-		nft_set_pktinfo(pkt, ops, skb, state);
+		nft_set_pktinfo(pkt, skb, state);
 }
 
 static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-					       const struct nf_hook_ops *ops,
 					       struct sk_buff *skb,
 					       const struct nf_hook_state *state)
 {
 #if IS_ENABLED(CONFIG_IPV6)
 	if (nft_bridge_ip6hdr_validate(skb) &&
-	    nft_set_pktinfo_ipv6(pkt, ops, skb, state) == 0)
+	    nft_set_pktinfo_ipv6(pkt, skb, state) == 0)
 		return;
 #endif
-	nft_set_pktinfo(pkt, ops, skb, state);
+	nft_set_pktinfo(pkt, skb, state);
 }
 
 static unsigned int
-nft_do_chain_bridge(const struct nf_hook_ops *ops,
+nft_do_chain_bridge(void *priv,
 		    struct sk_buff *skb,
 		    const struct nf_hook_state *state)
 {
@@ -97,17 +95,17 @@ nft_do_chain_bridge(const struct nf_hook_ops *ops,
 
 	switch (eth_hdr(skb)->h_proto) {
 	case htons(ETH_P_IP):
-		nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state);
+		nft_bridge_set_pktinfo_ipv4(&pkt, skb, state);
 		break;
 	case htons(ETH_P_IPV6):
-		nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state);
+		nft_bridge_set_pktinfo_ipv6(&pkt, skb, state);
 		break;
 	default:
-		nft_set_pktinfo(&pkt, ops, skb, state);
+		nft_set_pktinfo(&pkt, skb, state);
 		break;
 	}
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
 static struct nft_af_info nft_af_bridge __read_mostly = {
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 858d848564ee..fdba3d9fbff3 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -261,7 +261,6 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
 				   const struct nft_pktinfo *pkt)
 {
 	struct nft_reject *priv = nft_expr_priv(expr);
-	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
 	const unsigned char *dest = eth_hdr(pkt->skb)->h_dest;
 
 	if (is_broadcast_ether_addr(dest) ||
@@ -273,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
 			nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
-						      pkt->ops->hooknum,
+						      pkt->hook,
 						      priv->icmp_code);
 			break;
 		case NFT_REJECT_TCP_RST:
 			nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in,
-							pkt->ops->hooknum);
+							pkt->hook);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
 			nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
-						      pkt->ops->hooknum,
+						      pkt->hook,
 						      nft_reject_icmp_code(priv->icmp_code));
 			break;
 		}
@@ -290,17 +289,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
 	case htons(ETH_P_IPV6):
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
-			nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
-						      pkt->ops->hooknum,
+			nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
+						      pkt->in, pkt->hook,
 						      priv->icmp_code);
 			break;
 		case NFT_REJECT_TCP_RST:
-			nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in,
-							pkt->ops->hooknum);
+			nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb,
+							pkt->in, pkt->hook);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
-			nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
-						      pkt->ops->hooknum,
+			nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
+						      pkt->in, pkt->hook,
 						      nft_reject_icmpv6_code(priv->icmp_code));
 			break;
 		}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a1ba6875c2a2..6863310d6973 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -96,7 +96,7 @@ struct bcm_op {
 	canid_t can_id;
 	u32 flags;
 	unsigned long frames_abs, frames_filtered;
-	struct timeval ival1, ival2;
+	struct bcm_timeval ival1, ival2;
 	struct hrtimer timer, thrtimer;
 	struct tasklet_struct tsklet, thrtsklet;
 	ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
@@ -131,6 +131,11 @@ static inline struct bcm_sock *bcm_sk(const struct sock *sk)
 	return (struct bcm_sock *)sk;
 }
 
+static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
+{
+	return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
+}
+
 #define CFSIZ sizeof(struct can_frame)
 #define OPSIZ sizeof(struct bcm_op)
 #define MHSIZ sizeof(struct bcm_msg_head)
@@ -953,8 +958,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		op->count = msg_head->count;
 		op->ival1 = msg_head->ival1;
 		op->ival2 = msg_head->ival2;
-		op->kt_ival1 = timeval_to_ktime(msg_head->ival1);
-		op->kt_ival2 = timeval_to_ktime(msg_head->ival2);
+		op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
+		op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
 
 		/* disable an active timer due to zero values? */
 		if (!op->kt_ival1.tv64 && !op->kt_ival2.tv64)
@@ -1134,8 +1139,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			/* set timer value */
 			op->ival1 = msg_head->ival1;
 			op->ival2 = msg_head->ival2;
-			op->kt_ival1 = timeval_to_ktime(msg_head->ival1);
-			op->kt_ival2 = timeval_to_ktime(msg_head->ival2);
+			op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
+			op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
 
 			/* disable an active timer due to zero value? */
 			if (!op->kt_ival1.tv64)
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 6b4d3a1684de..bcbec33c6a14 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -322,7 +322,7 @@ static int get_secret(struct ceph_crypto_key *dst, const char *name) {
 		goto out;
 	}
 
-	ckey = ukey->payload.data;
+	ckey = ukey->payload.data[0];
 	err = ceph_crypto_key_clone(dst, ckey);
 	if (err)
 		goto out_key;
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 4440edcce0d6..42e8649c6e79 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -537,7 +537,7 @@ static int ceph_key_preparse(struct key_preparsed_payload *prep)
 	if (ret < 0)
 		goto err_ckey;
 
-	prep->payload[0] = ckey;
+	prep->payload.data[0] = ckey;
 	prep->quotalen = datalen;
 	return 0;
 
@@ -549,14 +549,14 @@ err:
 
 static void ceph_key_free_preparse(struct key_preparsed_payload *prep)
 {
-	struct ceph_crypto_key *ckey = prep->payload[0];
+	struct ceph_crypto_key *ckey = prep->payload.data[0];
 	ceph_crypto_key_destroy(ckey);
 	kfree(ckey);
 }
 
 static void ceph_key_destroy(struct key *key)
 {
-	struct ceph_crypto_key *ckey = key->payload.data;
+	struct ceph_crypto_key *ckey = key->payload.data[0];
 
 	ceph_crypto_key_destroy(ckey);
 	kfree(ckey);
diff --git a/net/core/dev.c b/net/core/dev.c
index c14748d051e7..ab9b8d0d115e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2942,9 +2942,11 @@ EXPORT_SYMBOL(xmit_recursion);
 
 /**
  *	dev_loopback_xmit - loop back @skb
+ *	@net: network namespace this loopback is happening in
+ *	@sk:  sk needed to be a netfilter okfn
  *	@skb: buffer to transmit
  */
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	skb_reset_mac_header(skb);
 	__skb_pull(skb, skb_network_offset(skb));
@@ -2999,6 +3001,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 			new_index = skb_tx_hash(dev, skb);
 
 		if (queue_index != new_index && sk &&
+		    sk_fullsock(sk) &&
 		    rcu_access_pointer(sk->sk_dst_cache))
 			sk_tx_queue_set(sk, new_index);
 
@@ -3170,11 +3173,11 @@ out:
 	return rc;
 }
 
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb)
 {
 	return __dev_queue_xmit(skb, NULL);
 }
-EXPORT_SYMBOL(dev_queue_xmit_sk);
+EXPORT_SYMBOL(dev_queue_xmit);
 
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
 {
@@ -3695,6 +3698,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 	case TC_ACT_QUEUED:
 		kfree_skb(skb);
 		return NULL;
+	case TC_ACT_REDIRECT:
+		/* skb_mac_header check was done by cls/act_bpf, so
+		 * we can safely push the L2 header back before
+		 * redirecting to another netdev
+		 */
+		__skb_push(skb, skb->mac_len);
+		skb_do_redirect(skb);
+		return NULL;
 	default:
 		break;
 	}
@@ -4009,13 +4020,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
  *	NET_RX_SUCCESS: no congestion
  *	NET_RX_DROP: packet was dropped
  */
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
+int netif_receive_skb(struct sk_buff *skb)
 {
 	trace_netif_receive_skb_entry(skb);
 
 	return netif_receive_skb_internal(skb);
 }
-EXPORT_SYMBOL(netif_receive_skb_sk);
+EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending
  * Called with irqs disabled.
@@ -4884,8 +4895,7 @@ struct netdev_adjacent {
 	struct rcu_head rcu;
 };
 
-static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
-						 struct net_device *adj_dev,
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
 						 struct list_head *adj_list)
 {
 	struct netdev_adjacent *adj;
@@ -4911,7 +4921,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
 {
 	ASSERT_RTNL();
 
-	return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
+	return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 
@@ -5173,7 +5183,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
 	struct netdev_adjacent *adj;
 	int ret;
 
-	adj = __netdev_find_adj(dev, adj_dev, dev_list);
+	adj = __netdev_find_adj(adj_dev, dev_list);
 
 	if (adj) {
 		adj->ref_nr++;
@@ -5229,7 +5239,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
 {
 	struct netdev_adjacent *adj;
 
-	adj = __netdev_find_adj(dev, adj_dev, dev_list);
+	adj = __netdev_find_adj(adj_dev, dev_list);
 
 	if (!adj) {
 		pr_err("tried to remove device %s from %s\n",
@@ -5350,10 +5360,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 		return -EBUSY;
 
 	/* To prevent loops, check if dev is not upper device to upper_dev. */
-	if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
+	if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
 		return -EBUSY;
 
-	if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
+	if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
 		return -EEXIST;
 
 	if (master && netdev_master_upper_dev_get(dev))
@@ -5363,6 +5373,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	changeupper_info.master = master;
 	changeupper_info.linking = true;
 
+	ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+					    &changeupper_info.info);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		return ret;
+
 	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
 						   master);
 	if (ret)
@@ -5505,6 +5521,9 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 	changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
 	changeupper_info.linking = false;
 
+	call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+				      &changeupper_info.info);
+
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
 	/* Here is the tricky part. We must remove all dev's lower
@@ -5631,7 +5650,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 
 	if (!lower_dev)
 		return NULL;
-	lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+	lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
 	if (!lower)
 		return NULL;
 
@@ -6269,6 +6288,48 @@ static void rollback_registered(struct net_device *dev)
 	list_del(&single);
 }
 
+static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
+	struct net_device *upper, netdev_features_t features)
+{
+	netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+	netdev_features_t feature;
+	int feature_bit;
+
+	for_each_netdev_feature(&upper_disables, feature_bit) {
+		feature = __NETIF_F_BIT(feature_bit);
+		if (!(upper->wanted_features & feature)
+		    && (features & feature)) {
+			netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
+				   &feature, upper->name);
+			features &= ~feature;
+		}
+	}
+
+	return features;
+}
+
+static void netdev_sync_lower_features(struct net_device *upper,
+	struct net_device *lower, netdev_features_t features)
+{
+	netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
+	netdev_features_t feature;
+	int feature_bit;
+
+	for_each_netdev_feature(&upper_disables, feature_bit) {
+		feature = __NETIF_F_BIT(feature_bit);
+		if (!(features & feature) && (lower->features & feature)) {
+			netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
+				   &feature, lower->name);
+			lower->wanted_features &= ~feature;
+			netdev_update_features(lower);
+
+			if (unlikely(lower->features & feature))
+				netdev_WARN(upper, "failed to disable %pNF on %s!\n",
+					    &feature, lower->name);
+		}
+	}
+}
+
 static netdev_features_t netdev_fix_features(struct net_device *dev,
 	netdev_features_t features)
 {
@@ -6338,8 +6399,10 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 
 int __netdev_update_features(struct net_device *dev)
 {
+	struct net_device *upper, *lower;
 	netdev_features_t features;
-	int err = 0;
+	struct list_head *iter;
+	int err = -1;
 
 	ASSERT_RTNL();
 
@@ -6351,8 +6414,12 @@ int __netdev_update_features(struct net_device *dev)
 	/* driver might be less strict about feature dependencies */
 	features = netdev_fix_features(dev, features);
 
+	/* some features can't be enabled if they're off an an upper device */
+	netdev_for_each_upper_dev_rcu(dev, upper, iter)
+		features = netdev_sync_upper_features(dev, upper, features);
+
 	if (dev->features == features)
-		return 0;
+		goto sync_lower;
 
 	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
 		&dev->features, &features);
@@ -6367,10 +6434,17 @@ int __netdev_update_features(struct net_device *dev)
 		return -1;
 	}
 
+sync_lower:
+	/* some features must be disabled on lower devices when disabled
+	 * on an upper device (think: bonding master or bridge)
+	 */
+	netdev_for_each_lower_dev(dev, lower, iter)
+		netdev_sync_lower_features(dev, lower, features);
+
 	if (!err)
 		dev->features = features;
 
-	return 1;
+	return err < 0 ? 0 : 1;
 }
 
 /**
diff --git a/net/core/dst.c b/net/core/dst.c
index 0771c8cb9307..e6dc77252fe9 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -144,12 +144,12 @@ loop:
 	mutex_unlock(&dst_gc_mutex);
 }
 
-int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
+int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	kfree_skb(skb);
 	return 0;
 }
-EXPORT_SYMBOL(dst_discard_sk);
+EXPORT_SYMBOL(dst_discard_out);
 
 const u32 dst_default_metrics[RTAX_MAX + 1] = {
 	/* This initializer is needed to force linker to place this variable
@@ -177,7 +177,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
 	dst->xfrm = NULL;
 #endif
 	dst->input = dst_discard;
-	dst->output = dst_discard_sk;
+	dst->output = dst_discard_out;
 	dst->error = 0;
 	dst->obsolete = initial_obsolete;
 	dst->header_len = 0;
@@ -224,7 +224,7 @@ static void ___dst_free(struct dst_entry *dst)
 	 */
 	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
 		dst->input = dst_discard;
-		dst->output = dst_discard_sk;
+		dst->output = dst_discard_out;
 	}
 	dst->obsolete = DST_OBSOLETE_DEAD;
 }
@@ -306,7 +306,7 @@ void dst_release(struct dst_entry *dst)
 		if (unlikely(newrefcnt < 0))
 			net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
 					     __func__, dst, newrefcnt);
-		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+		if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
 			call_rcu(&dst->rcu_head, dst_destroy_rcu);
 	}
 }
@@ -352,7 +352,7 @@ static struct dst_ops md_dst_ops = {
 	.family =		AF_UNSPEC,
 };
 
-static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb)
+static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	WARN_ONCE(1, "Attempting to call output on metadata dst\n");
 	kfree_skb(skb);
@@ -375,7 +375,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
 		 DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
 
 	dst->input = dst_md_discard;
-	dst->output = dst_md_discard_sk;
+	dst->output = dst_md_discard_out;
 
 	memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
 }
@@ -430,7 +430,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 	if (!unregister) {
 		dst->input = dst_discard;
-		dst->output = dst_discard_sk;
+		dst->output = dst_discard_out;
 	} else {
 		dst->dev = dev_net(dst->dev)->loopback_dev;
 		dev_hold(dst->dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index bb18c3680001..672eefbfbe99 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,16 +49,17 @@
 #include <net/sch_generic.h>
 #include <net/cls_cgroup.h>
 #include <net/dst_metadata.h>
+#include <net/dst.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
  *	@sk: sock associated with &sk_buff
  *	@skb: buffer to filter
  *
- * Run the filter code and then cut skb->data to correct size returned by
- * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * Run the eBPF program and then cut skb->data to correct size returned by
+ * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
  * than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
+ * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
  * be accepted or -EPERM if the packet should be tossed.
  *
  */
@@ -82,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter) {
-		unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+		unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
 
 		err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
 	}
@@ -148,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 	return raw_smp_processor_id();
 }
 
-/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
-{
-	return prandom_u32();
-}
-
 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 			      struct bpf_insn *insn_buf)
 {
@@ -312,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
 			break;
 		case SKF_AD_OFF + SKF_AD_RANDOM:
-			*insn = BPF_EMIT_CALL(__get_random_u32);
+			*insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
+			bpf_user_rnd_init_once();
 			break;
 		}
 		break;
@@ -1001,7 +997,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
 	int err;
 
 	fp->bpf_func = NULL;
-	fp->jited = false;
+	fp->jited = 0;
 
 	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
@@ -1083,16 +1079,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create);
  *	@pfp: the unattached filter that is created
  *	@fprog: the filter program
  *	@trans: post-classic verifier transformation handler
+ *	@save_orig: save classic BPF program
  *
  * This function effectively does the same as bpf_prog_create(), only
  * that it builds up its insns buffer from user space provided buffer.
  * It also allows for passing a bpf_aux_classic_check_t handler.
  */
 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
-			      bpf_aux_classic_check_t trans)
+			      bpf_aux_classic_check_t trans, bool save_orig)
 {
 	unsigned int fsize = bpf_classic_proglen(fprog);
 	struct bpf_prog *fp;
+	int err;
 
 	/* Make sure new filter is there and in the right amounts. */
 	if (fprog->filter == NULL)
@@ -1108,12 +1106,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 	}
 
 	fp->len = fprog->len;
-	/* Since unattached filters are not copied back to user
-	 * space through sk_get_filter(), we do not need to hold
-	 * a copy here, and can spare us the work.
-	 */
 	fp->orig_prog = NULL;
 
+	if (save_orig) {
+		err = bpf_prog_store_orig_filter(fp, fprog);
+		if (err) {
+			__bpf_prog_free(fp);
+			return -ENOMEM;
+		}
+	}
+
 	/* bpf_prepare_filter() already takes care of freeing
 	 * memory in case something goes wrong.
 	 */
@@ -1404,9 +1406,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
 	if (unlikely(!dev))
 		return -EINVAL;
 
-	if (unlikely(!(dev->flags & IFF_UP)))
-		return -EINVAL;
-
 	skb2 = skb_clone(skb, GFP_ATOMIC);
 	if (unlikely(!skb2))
 		return -ENOMEM;
@@ -1428,6 +1427,49 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
 	.arg3_type      = ARG_ANYTHING,
 };
 
+struct redirect_info {
+	u32 ifindex;
+	u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+	ri->ifindex = ifindex;
+	ri->flags = flags;
+	return TC_ACT_REDIRECT;
+}
+
+int skb_do_redirect(struct sk_buff *skb)
+{
+	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+	struct net_device *dev;
+
+	dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+	ri->ifindex = 0;
+	if (unlikely(!dev)) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+		return dev_forward_skb(dev, skb);
+
+	skb->dev = dev;
+	skb_sender_cpu_clear(skb);
+	return dev_queue_xmit(skb);
+}
+
+const struct bpf_func_proto bpf_redirect_proto = {
+	.func           = bpf_redirect,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_ANYTHING,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
 	return task_get_classid((struct sk_buff *) (unsigned long) r1);
@@ -1440,6 +1482,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
+static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+	const struct dst_entry *dst;
+
+	dst = skb_dst((struct sk_buff *) (unsigned long) r1);
+	if (dst)
+		return dst->tclassid;
+#endif
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_get_route_realm_proto = {
+	.func           = bpf_get_route_realm,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
+
 static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1580,7 +1641,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_trace_printk:
-		return bpf_get_trace_printk_proto();
+		if (capable(CAP_SYS_ADMIN))
+			return bpf_get_trace_printk_proto();
 	default:
 		return NULL;
 	}
@@ -1608,6 +1670,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_skb_get_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_key:
 		return bpf_get_skb_set_tunnel_key_proto();
+	case BPF_FUNC_redirect:
+		return &bpf_redirect_proto;
+	case BPF_FUNC_get_route_realm:
+		return &bpf_get_route_realm_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
@@ -1633,6 +1699,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type)
 {
+	if (off == offsetof(struct __sk_buff, tc_classid))
+		return false;
+
 	if (type == BPF_WRITE) {
 		switch (off) {
 		case offsetof(struct __sk_buff, cb[0]) ...
@@ -1649,10 +1718,14 @@ static bool sk_filter_is_valid_access(int off, int size,
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type)
 {
+	if (off == offsetof(struct __sk_buff, tc_classid))
+		return type == BPF_WRITE ? true : false;
+
 	if (type == BPF_WRITE) {
 		switch (off) {
 		case offsetof(struct __sk_buff, mark):
 		case offsetof(struct __sk_buff, tc_index):
+		case offsetof(struct __sk_buff, priority):
 		case offsetof(struct __sk_buff, cb[0]) ...
 			offsetof(struct __sk_buff, cb[4]):
 			break;
@@ -1665,7 +1738,8 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 
 static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 				      int src_reg, int ctx_off,
-				      struct bpf_insn *insn_buf)
+				      struct bpf_insn *insn_buf,
+				      struct bpf_prog *prog)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -1694,8 +1768,12 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 	case offsetof(struct __sk_buff, priority):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
 
-		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
-				      offsetof(struct sk_buff, priority));
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+					      offsetof(struct sk_buff, priority));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+					      offsetof(struct sk_buff, priority));
 		break;
 
 	case offsetof(struct __sk_buff, ingress_ifindex):
@@ -1752,6 +1830,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 		offsetof(struct __sk_buff, cb[4]):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
 
+		prog->cb_access = 1;
 		ctx_off -= offsetof(struct __sk_buff, cb[0]);
 		ctx_off += offsetof(struct sk_buff, cb);
 		ctx_off += offsetof(struct qdisc_skb_cb, data);
@@ -1761,6 +1840,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
 		break;
 
+	case offsetof(struct __sk_buff, tc_classid):
+		ctx_off -= offsetof(struct __sk_buff, tc_classid);
+		ctx_off += offsetof(struct sk_buff, cb);
+		ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+		WARN_ON(type != BPF_WRITE);
+		*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+		break;
+
 	case offsetof(struct __sk_buff, tc_index):
 #ifdef CONFIG_NET_SCHED
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index dfb1a9ca0835..299cfc24d888 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -180,7 +180,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
 }
 EXPORT_SYMBOL(lwtunnel_cmp_encap);
 
-int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	const struct lwtunnel_encap_ops *ops;
@@ -199,7 +199,7 @@ int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
 	rcu_read_lock();
 	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
 	if (likely(ops && ops->output))
-		ret = ops->output(sk, skb);
+		ret = ops->output(net, sk, skb);
 	rcu_read_unlock();
 
 	if (ret == -EOPNOTSUPP)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2b515ba7e94f..1aa8437ed6c4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2235,14 +2235,53 @@ static void neigh_update_notify(struct neighbour *neigh)
 	__neigh_notify(neigh, RTM_NEWNEIGH, 0);
 }
 
+static bool neigh_master_filtered(struct net_device *dev, int master_idx)
+{
+	struct net_device *master;
+
+	if (!master_idx)
+		return false;
+
+	master = netdev_master_upper_dev_get(dev);
+	if (!master || master->ifindex != master_idx)
+		return true;
+
+	return false;
+}
+
+static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
+{
+	if (filter_idx && dev->ifindex != filter_idx)
+		return true;
+
+	return false;
+}
+
 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			    struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
+	const struct nlmsghdr *nlh = cb->nlh;
+	struct nlattr *tb[NDA_MAX + 1];
 	struct neighbour *n;
 	int rc, h, s_h = cb->args[1];
 	int idx, s_idx = idx = cb->args[2];
 	struct neigh_hash_table *nht;
+	int filter_master_idx = 0, filter_idx = 0;
+	unsigned int flags = NLM_F_MULTI;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
+	if (!err) {
+		if (tb[NDA_IFINDEX])
+			filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
+
+		if (tb[NDA_MASTER])
+			filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
+
+		if (filter_idx || filter_master_idx)
+			flags |= NLM_F_DUMP_FILTERED;
+	}
 
 	rcu_read_lock_bh();
 	nht = rcu_dereference_bh(tbl->nht);
@@ -2255,12 +2294,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 		     n = rcu_dereference_bh(n->next)) {
 			if (!net_eq(dev_net(n->dev), net))
 				continue;
+			if (neigh_ifindex_filtered(n->dev, filter_idx))
+				continue;
+			if (neigh_master_filtered(n->dev, filter_master_idx))
+				continue;
 			if (idx < s_idx)
 				goto next;
 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
-					    NLM_F_MULTI) < 0) {
+					    flags) < 0) {
 				rc = -1;
 				goto out;
 			}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 830f8a7c1cb1..f88a62ab019d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -471,7 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
 
 	if (dev_isalive(netdev)) {
 		struct switchdev_attr attr = {
-			.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+			.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
 			.flags = SWITCHDEV_F_NO_RECURSE,
 		};
 
@@ -1003,15 +1003,12 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
 }
 
 #ifdef CONFIG_XPS
-static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
 	struct net_device *dev = queue->dev;
-	int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++)
-		if (queue == &dev->_tx[i])
-			break;
+	unsigned int i;
 
+	i = queue - dev->_tx;
 	BUG_ON(i >= dev->num_tx_queues);
 
 	return i;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8bdada242a7d..94acfc89ad97 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -140,7 +140,7 @@ static void queue_process(struct work_struct *work)
  * case. Further, we test the poll_owner to avoid recursion on UP
  * systems where the lock doesn't exist.
  */
-static int poll_one_napi(struct napi_struct *napi, int budget)
+static void poll_one_napi(struct napi_struct *napi)
 {
 	int work = 0;
 
@@ -149,33 +149,33 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
 	 * holding the napi->poll_lock.
 	 */
 	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
-		return budget;
+		return;
 
 	/* If we set this bit but see that it has already been set,
 	 * that indicates that napi has been disabled and we need
 	 * to abort this operation
 	 */
 	if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
-		goto out;
+		return;
 
-	work = napi->poll(napi, budget);
-	WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
+	/* We explicilty pass the polling call a budget of 0 to
+	 * indicate that we are clearing the Tx path only.
+	 */
+	work = napi->poll(napi, 0);
+	WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
 	trace_napi_poll(napi);
 
 	clear_bit(NAPI_STATE_NPSVC, &napi->state);
-
-out:
-	return budget - work;
 }
 
-static void poll_napi(struct net_device *dev, int budget)
+static void poll_napi(struct net_device *dev)
 {
 	struct napi_struct *napi;
 
 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
 		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			budget = poll_one_napi(napi, budget);
+			poll_one_napi(napi);
 			spin_unlock(&napi->poll_lock);
 		}
 	}
@@ -185,7 +185,6 @@ static void netpoll_poll_dev(struct net_device *dev)
 {
 	const struct net_device_ops *ops;
 	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
-	int budget = 0;
 
 	/* Don't do any rx activity if the dev_lock mutex is held
 	 * the dev_open/close paths use this to block netpoll activity
@@ -208,7 +207,7 @@ static void netpoll_poll_dev(struct net_device *dev)
 	/* Process pending work on NIC */
 	ops->ndo_poll_controller(dev);
 
-	poll_napi(dev, budget);
+	poll_napi(dev);
 
 	up(&ni->dev_lock);
 
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 4eab4a94a59d..703cf76aa7c2 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -58,7 +58,7 @@
  *   jneq #0x0, drop_ieee1588      ; for PTP_GEN_BIT and drop these
  *   ldh [18]                      ; reload payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x70                      ; PTP_CLASS_VLAN|PTP_CLASS_L2
+ *   or #0xc0                      ; PTP_CLASS_VLAN|PTP_CLASS_L2
  *   ret a                         ; return PTP class
  *
  * ; PTP over UDP over IPv4 over 802.1Q over Ethernet
@@ -73,7 +73,7 @@
  *   jneq #319, drop_8021q_ipv4    ; is port PTP_EV_PORT ?
  *   ldh [x + 26]                  ; load payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x50                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
+ *   or #0x90                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV4
  *   ret a                         ; return PTP class
  *   drop_8021q_ipv4: ret #0x0     ; PTP_CLASS_NONE
  *
@@ -86,7 +86,7 @@
  *   jneq #319, drop_8021q_ipv6          ; is port PTP_EV_PORT ?
  *   ldh [66]                      ; load payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x60                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
+ *   or #0xa0                      ; PTP_CLASS_VLAN|PTP_CLASS_IPV6
  *   ret a                         ; return PTP class
  *   drop_8021q_ipv6: ret #0x0     ; PTP_CLASS_NONE
  *
@@ -98,7 +98,7 @@
  *   jneq #0x0, drop_ieee1588      ; for PTP_GEN_BIT and drop these
  *   ldh [14]                      ; reload payload
  *   and #0xf                      ; mask PTP_CLASS_VMASK
- *   or #0x30                      ; PTP_CLASS_L2
+ *   or #0x40                      ; PTP_CLASS_L2
  *   ret a                         ; return PTP class
  *   drop_ieee1588: ret #0x0       ; PTP_CLASS_NONE
  */
@@ -150,7 +150,7 @@ void __init ptp_classifier_init(void)
 		{ 0x15,  0, 35, 0x00000000 },
 		{ 0x28,  0,  0, 0x00000012 },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000070 },
+		{ 0x44,  0,  0, 0x000000c0 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x15,  0, 12, 0x00000800 },
 		{ 0x30,  0,  0, 0x0000001b },
@@ -162,7 +162,7 @@ void __init ptp_classifier_init(void)
 		{ 0x15,  0,  4, 0x0000013f },
 		{ 0x48,  0,  0, 0x0000001a },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000050 },
+		{ 0x44,  0,  0, 0x00000090 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 		{ 0x15,  0,  8, 0x000086dd },
@@ -172,7 +172,7 @@ void __init ptp_classifier_init(void)
 		{ 0x15,  0,  4, 0x0000013f },
 		{ 0x28,  0,  0, 0x00000042 },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000060 },
+		{ 0x44,  0,  0, 0x000000a0 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 		{ 0x15,  0,  7, 0x000088f7 },
@@ -181,7 +181,7 @@ void __init ptp_classifier_init(void)
 		{ 0x15,  0,  4, 0x00000000 },
 		{ 0x28,  0,  0, 0x0000000e },
 		{ 0x54,  0,  0, 0x0000000f },
-		{ 0x44,  0,  0, 0x00000030 },
+		{ 0x44,  0,  0, 0x00000040 },
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 	};
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index b42f0e26f89e..5d26056b6d8f 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -37,90 +37,16 @@
 int sysctl_max_syn_backlog = 256;
 EXPORT_SYMBOL(sysctl_max_syn_backlog);
 
-int reqsk_queue_alloc(struct request_sock_queue *queue,
-		      unsigned int nr_table_entries)
+void reqsk_queue_alloc(struct request_sock_queue *queue)
 {
-	size_t lopt_size = sizeof(struct listen_sock);
-	struct listen_sock *lopt = NULL;
+	spin_lock_init(&queue->rskq_lock);
 
-	nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
-	nr_table_entries = max_t(u32, nr_table_entries, 8);
-	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
-	lopt_size += nr_table_entries * sizeof(struct request_sock *);
+	spin_lock_init(&queue->fastopenq.lock);
+	queue->fastopenq.rskq_rst_head = NULL;
+	queue->fastopenq.rskq_rst_tail = NULL;
+	queue->fastopenq.qlen = 0;
 
-	if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
-		lopt = kzalloc(lopt_size, GFP_KERNEL |
-					  __GFP_NOWARN |
-					  __GFP_NORETRY);
-	if (!lopt)
-		lopt = vzalloc(lopt_size);
-	if (!lopt)
-		return -ENOMEM;
-
-	get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
-	spin_lock_init(&queue->syn_wait_lock);
 	queue->rskq_accept_head = NULL;
-	lopt->nr_table_entries = nr_table_entries;
-	lopt->max_qlen_log = ilog2(nr_table_entries);
-
-	spin_lock_bh(&queue->syn_wait_lock);
-	queue->listen_opt = lopt;
-	spin_unlock_bh(&queue->syn_wait_lock);
-
-	return 0;
-}
-
-void __reqsk_queue_destroy(struct request_sock_queue *queue)
-{
-	/* This is an error recovery path only, no locking needed */
-	kvfree(queue->listen_opt);
-}
-
-static inline struct listen_sock *reqsk_queue_yank_listen_sk(
-		struct request_sock_queue *queue)
-{
-	struct listen_sock *lopt;
-
-	spin_lock_bh(&queue->syn_wait_lock);
-	lopt = queue->listen_opt;
-	queue->listen_opt = NULL;
-	spin_unlock_bh(&queue->syn_wait_lock);
-
-	return lopt;
-}
-
-void reqsk_queue_destroy(struct request_sock_queue *queue)
-{
-	/* make all the listen_opt local to us */
-	struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
-
-	if (listen_sock_qlen(lopt) != 0) {
-		unsigned int i;
-
-		for (i = 0; i < lopt->nr_table_entries; i++) {
-			struct request_sock *req;
-
-			spin_lock_bh(&queue->syn_wait_lock);
-			while ((req = lopt->syn_table[i]) != NULL) {
-				lopt->syn_table[i] = req->dl_next;
-				/* Because of following del_timer_sync(),
-				 * we must release the spinlock here
-				 * or risk a dead lock.
-				 */
-				spin_unlock_bh(&queue->syn_wait_lock);
-				atomic_inc(&lopt->qlen_dec);
-				if (del_timer_sync(&req->rsk_timer))
-					reqsk_put(req);
-				reqsk_put(req);
-				spin_lock_bh(&queue->syn_wait_lock);
-			}
-			spin_unlock_bh(&queue->syn_wait_lock);
-		}
-	}
-
-	if (WARN_ON(listen_sock_qlen(lopt) != 0))
-		pr_err("qlen %u\n", listen_sock_qlen(lopt));
-	kvfree(lopt);
 }
 
 /*
@@ -174,7 +100,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
 	struct sock *lsk = req->rsk_listener;
 	struct fastopen_queue *fastopenq;
 
-	fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
+	fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
 
 	tcp_sk(sk)->fastopen_rsk = NULL;
 	spin_lock_bh(&fastopenq->lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0ec48403ed68..504bd17b7456 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -96,7 +96,7 @@ int rtnl_is_locked(void)
 EXPORT_SYMBOL(rtnl_is_locked);
 
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_rtnl_is_held(void)
+bool lockdep_rtnl_is_held(void)
 {
 	return lockdep_is_held(&rtnl_mutex);
 }
@@ -497,7 +497,8 @@ void rtnl_af_unregister(struct rtnl_af_ops *ops)
 }
 EXPORT_SYMBOL_GPL(rtnl_af_unregister);
 
-static size_t rtnl_link_get_af_size(const struct net_device *dev)
+static size_t rtnl_link_get_af_size(const struct net_device *dev,
+				    u32 ext_filter_mask)
 {
 	struct rtnl_af_ops *af_ops;
 	size_t size;
@@ -509,7 +510,7 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev)
 		if (af_ops->get_link_af_size) {
 			/* AF_* + nested data */
 			size += nla_total_size(sizeof(struct nlattr)) +
-				af_ops->get_link_af_size(dev);
+				af_ops->get_link_af_size(dev, ext_filter_mask);
 		}
 	}
 
@@ -837,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 /* IFLA_VF_STATS_BROADCAST */
 			 nla_total_size(sizeof(__u64)) +
 			 /* IFLA_VF_STATS_MULTICAST */
-			 nla_total_size(sizeof(__u64)));
+			 nla_total_size(sizeof(__u64)) +
+			 nla_total_size(sizeof(struct ifla_vf_trust)));
 		return size;
 	} else
 		return 0;
@@ -900,7 +902,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
 	       + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
-	       + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+	       + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
 	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
 	       + nla_total_size(1); /* IFLA_PROTO_DOWN */
@@ -1025,7 +1027,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	int err;
 	struct switchdev_attr attr = {
-		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
 		.flags = SWITCHDEV_F_NO_RECURSE,
 	};
 
@@ -1160,6 +1162,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_link_state vf_linkstate;
 			struct ifla_vf_rss_query_en vf_rss_query_en;
 			struct ifla_vf_stats vf_stats;
+			struct ifla_vf_trust vf_trust;
 
 			/*
 			 * Not all SR-IOV capable drivers support the
@@ -1169,6 +1172,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			 */
 			ivi.spoofchk = -1;
 			ivi.rss_query_en = -1;
+			ivi.trusted = -1;
 			memset(ivi.mac, 0, sizeof(ivi.mac));
 			/* The default value for VF link state is "auto"
 			 * IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1182,7 +1186,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
 				vf_linkstate.vf =
-				vf_rss_query_en.vf = ivi.vf;
+				vf_rss_query_en.vf =
+				vf_trust.vf = ivi.vf;
 
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
@@ -1193,6 +1198,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			vf_spoofchk.setting = ivi.spoofchk;
 			vf_linkstate.link_state = ivi.linkstate;
 			vf_rss_query_en.setting = ivi.rss_query_en;
+			vf_trust.setting = ivi.trusted;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
 			if (!vf) {
 				nla_nest_cancel(skb, vfinfo);
@@ -1210,7 +1216,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				    &vf_linkstate) ||
 			    nla_put(skb, IFLA_VF_RSS_QUERY_EN,
 				    sizeof(vf_rss_query_en),
-				    &vf_rss_query_en))
+				    &vf_rss_query_en) ||
+			    nla_put(skb, IFLA_VF_TRUST,
+				    sizeof(vf_trust), &vf_trust))
 				goto nla_put_failure;
 			memset(&vf_stats, 0, sizeof(vf_stats));
 			if (dev->netdev_ops->ndo_get_vf_stats)
@@ -1272,7 +1280,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			if (!(af = nla_nest_start(skb, af_ops->family)))
 				goto nla_put_failure;
 
-			err = af_ops->fill_link_af(skb, dev);
+			err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
 
 			/*
 			 * Caller may return ENODATA to indicate that there
@@ -1347,6 +1355,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_LINK_STATE]	= { .len = sizeof(struct ifla_vf_link_state) },
 	[IFLA_VF_RSS_QUERY_EN]	= { .len = sizeof(struct ifla_vf_rss_query_en) },
 	[IFLA_VF_STATS]		= { .type = NLA_NESTED },
+	[IFLA_VF_TRUST]		= { .len = sizeof(struct ifla_vf_trust) },
 };
 
 static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
@@ -1586,6 +1595,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 			return err;
 	}
 
+	if (tb[IFLA_VF_TRUST]) {
+		struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_trust)
+			err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
+		if (err < 0)
+			return err;
+	}
+
 	return err;
 }
 
@@ -3443,4 +3462,3 @@ void __init rtnetlink_init(void)
 	rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
 	rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
 }
-
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fab4599ba8b2..aa41e6dd6429 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -414,7 +414,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
 	len += NET_SKB_PAD;
 
 	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
-	    (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
 		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
 		if (!skb)
 			goto skb_fail;
@@ -481,7 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
 	len += NET_SKB_PAD + NET_IP_ALIGN;
 
 	if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
-	    (gfp_mask & (__GFP_WAIT | GFP_DMA))) {
+	    (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
 		skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
 		if (!skb)
 			goto skb_fail;
@@ -4452,7 +4452,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 		return NULL;
 
 	gfp_head = gfp_mask;
-	if (gfp_head & __GFP_WAIT)
+	if (gfp_head & __GFP_DIRECT_RECLAIM)
 		gfp_head |= __GFP_REPEAT;
 
 	*errcode = -ENOBUFS;
@@ -4467,7 +4467,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 
 		while (order) {
 			if (npages >= 1 << order) {
-				page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
+				page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
 						   __GFP_COMP |
 						   __GFP_NOWARN |
 						   __GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 3307c02244d3..1e4dd54bfb5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -422,13 +422,25 @@ static void sock_warn_obsolete_bsdism(const char *name)
 	}
 }
 
+static bool sock_needs_netstamp(const struct sock *sk)
+{
+	switch (sk->sk_family) {
+	case AF_UNSPEC:
+	case AF_UNIX:
+		return false;
+	default:
+		return true;
+	}
+}
+
 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
 
 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
 {
 	if (sk->sk_flags & flags) {
 		sk->sk_flags &= ~flags;
-		if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
+		if (sock_needs_netstamp(sk) &&
+		    !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
 			net_disable_timestamp();
 	}
 }
@@ -988,6 +1000,10 @@ set_rcvbuf:
 					 sk->sk_max_pacing_rate);
 		break;
 
+	case SO_INCOMING_CPU:
+		sk->sk_incoming_cpu = val;
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1578,7 +1594,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		if (newsk->sk_prot->sockets_allocated)
 			sk_sockets_allocated_inc(newsk);
 
-		if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+		if (sock_needs_netstamp(sk) &&
+		    newsk->sk_flags & SK_FLAGS_TIMESTAMP)
 			net_enable_timestamp();
 	}
 out:
@@ -1639,6 +1656,28 @@ void sock_wfree(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_wfree);
 
+void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+	skb_orphan(skb);
+	skb->sk = sk;
+#ifdef CONFIG_INET
+	if (unlikely(!sk_fullsock(sk))) {
+		skb->destructor = sock_edemux;
+		sock_hold(sk);
+		return;
+	}
+#endif
+	skb->destructor = sock_wfree;
+	skb_set_hash_from_sk(skb, sk);
+	/*
+	 * We used to take a refcount on sk, but following operation
+	 * is enough to guarantee sk_free() wont free this sock until
+	 * all in-flight packets are completed
+	 */
+	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
+}
+EXPORT_SYMBOL(skb_set_owner_w);
+
 void skb_orphan_partial(struct sk_buff *skb)
 {
 	/* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
@@ -1852,6 +1891,32 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
 }
 EXPORT_SYMBOL(sock_alloc_send_skb);
 
+int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
+		   struct sockcm_cookie *sockc)
+{
+	struct cmsghdr *cmsg;
+
+	for_each_cmsghdr(cmsg, msg) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			continue;
+		switch (cmsg->cmsg_type) {
+		case SO_MARK:
+			if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+				return -EPERM;
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+				return -EINVAL;
+			sockc->mark = *(u32 *)CMSG_DATA(cmsg);
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(sock_cmsg_send);
+
 /* On 32bit arches, an skb frag is limited to 2^15 */
 #define SKB_FRAG_PAGE_ORDER	get_order(32768)
 
@@ -1879,8 +1944,10 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
 
 	pfrag->offset = 0;
 	if (SKB_FRAG_PAGE_ORDER) {
-		pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
-					  __GFP_NOWARN | __GFP_NORETRY,
+		/* Avoid direct reclaim but allow kswapd to wake */
+		pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
+					  __GFP_COMP | __GFP_NOWARN |
+					  __GFP_NORETRY,
 					  SKB_FRAG_PAGE_ORDER);
 		if (likely(pfrag->page)) {
 			pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
@@ -2353,6 +2420,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sk->sk_max_pacing_rate = ~0U;
 	sk->sk_pacing_rate = ~0U;
+	sk->sk_incoming_cpu = -1;
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.txt for details)
@@ -2479,7 +2547,8 @@ void sock_enable_timestamp(struct sock *sk, int flag)
 		 * time stamping, but time stamping might have been on
 		 * already because of the other one
 		 */
-		if (!(previous_flags & SK_FLAGS_TIMESTAMP))
+		if (sock_needs_netstamp(sk) &&
+		    !(previous_flags & SK_FLAGS_TIMESTAMP))
 			net_enable_timestamp();
 	}
 }
@@ -2758,7 +2827,7 @@ static int req_prot_init(const struct proto *prot)
 
 	rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
 					   rsk_prot->obj_size, 0,
-					   0, NULL);
+					   prot->slab_flags, NULL);
 
 	if (!rsk_prot->slab) {
 		pr_crit("%s: Can't create request sock SLAB cache!\n",
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 817622f3dbb7..0c1d58d43f67 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -1,3 +1,5 @@
+/* License: GPL */
+
 #include <linux/mutex.h>
 #include <linux/socket.h>
 #include <linux/skbuff.h>
@@ -323,14 +325,4 @@ static int __init sock_diag_init(void)
 	BUG_ON(!broadcast_wq);
 	return register_pernet_subsys(&diag_net_ops);
 }
-
-static void __exit sock_diag_exit(void)
-{
-	unregister_pernet_subsys(&diag_net_ops);
-	destroy_workqueue(broadcast_wq);
-}
-
-module_init(sock_diag_init);
-module_exit(sock_diag_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
+device_initcall(sock_diag_init);
diff --git a/net/core/tso.c b/net/core/tso.c
index 630b30b4fb53..5dca7ce8ee9f 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -1,4 +1,5 @@
 #include <linux/export.h>
+#include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/tso.h>
 #include <asm/unaligned.h>
@@ -14,18 +15,24 @@ EXPORT_SYMBOL(tso_count_descs);
 void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
 		   int size, bool is_last)
 {
-	struct iphdr *iph;
 	struct tcphdr *tcph;
 	int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 	int mac_hdr_len = skb_network_offset(skb);
 
 	memcpy(hdr, skb->data, hdr_len);
-	iph = (struct iphdr *)(hdr + mac_hdr_len);
-	iph->id = htons(tso->ip_id);
-	iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+	if (!tso->ipv6) {
+		struct iphdr *iph = (void *)(hdr + mac_hdr_len);
+
+		iph->id = htons(tso->ip_id);
+		iph->tot_len = htons(size + hdr_len - mac_hdr_len);
+		tso->ip_id++;
+	} else {
+		struct ipv6hdr *iph = (void *)(hdr + mac_hdr_len);
+
+		iph->payload_len = htons(size + tcp_hdrlen(skb));
+	}
 	tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
 	put_unaligned_be32(tso->tcp_seq, &tcph->seq);
-	tso->ip_id++;
 
 	if (!is_last) {
 		/* Clear all special flags for not last packet */
@@ -61,6 +68,7 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
 	tso->ip_id = ntohs(ip_hdr(skb)->id);
 	tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
 	tso->next_frag_idx = 0;
+	tso->ipv6 = vlan_get_protocol(skb) == htons(ETH_P_IPV6);
 
 	/* Build first data */
 	tso->size = skb_headlen(skb) - hdr_len;
diff --git a/net/core/utils.c b/net/core/utils.c
index 3dffce953c39..3d17ca8b4744 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -348,52 +348,3 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
 	}
 }
 EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
-
-struct __net_random_once_work {
-	struct work_struct work;
-	struct static_key *key;
-};
-
-static void __net_random_once_deferred(struct work_struct *w)
-{
-	struct __net_random_once_work *work =
-		container_of(w, struct __net_random_once_work, work);
-	BUG_ON(!static_key_enabled(work->key));
-	static_key_slow_dec(work->key);
-	kfree(work);
-}
-
-static void __net_random_once_disable_jump(struct static_key *key)
-{
-	struct __net_random_once_work *w;
-
-	w = kmalloc(sizeof(*w), GFP_ATOMIC);
-	if (!w)
-		return;
-
-	INIT_WORK(&w->work, __net_random_once_deferred);
-	w->key = key;
-	schedule_work(&w->work);
-}
-
-bool __net_get_random_once(void *buf, int nbytes, bool *done,
-			   struct static_key *once_key)
-{
-	static DEFINE_SPINLOCK(lock);
-	unsigned long flags;
-
-	spin_lock_irqsave(&lock, flags);
-	if (*done) {
-		spin_unlock_irqrestore(&lock, flags);
-		return false;
-	}
-
-	get_random_bytes(buf, nbytes);
-	*done = true;
-	spin_unlock_irqrestore(&lock, flags);
-
-	__net_random_once_disable_jump(once_key);
-
-	return true;
-}
-EXPORT_SYMBOL(__net_get_random_once);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 5b21f6f88e97..4f6c1862dfd2 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -13,6 +13,7 @@
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, see <http://www.gnu.org/licenses/>.
  *
+ * Description: Data Center Bridging netlink interface
  * Author: Lucy Liu <lucy.liu@intel.com>
  */
 
@@ -24,7 +25,7 @@
 #include <linux/dcbnl.h>
 #include <net/dcbevent.h>
 #include <linux/rtnetlink.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <net/sock.h>
 
 /* Data Center Bridging (DCB) is a collection of Ethernet enhancements
@@ -48,10 +49,6 @@
  * features for capable devices.
  */
 
-MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>");
-MODULE_DESCRIPTION("Data Center Bridging netlink interface");
-MODULE_LICENSE("GPL");
-
 /**************** DCB attribute policies *************************************/
 
 /* DCB netlink attributes policy */
@@ -1935,19 +1932,6 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
 }
 EXPORT_SYMBOL(dcb_ieee_delapp);
 
-static void dcb_flushapp(void)
-{
-	struct dcb_app_type *app;
-	struct dcb_app_type *tmp;
-
-	spin_lock_bh(&dcb_lock);
-	list_for_each_entry_safe(app, tmp, &dcb_app_list, list) {
-		list_del(&app->list);
-		kfree(app);
-	}
-	spin_unlock_bh(&dcb_lock);
-}
-
 static int __init dcbnl_init(void)
 {
 	INIT_LIST_HEAD(&dcb_app_list);
@@ -1957,12 +1941,4 @@ static int __init dcbnl_init(void)
 
 	return 0;
 }
-module_init(dcbnl_init);
-
-static void __exit dcbnl_exit(void)
-{
-	rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
-	rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
-	dcb_flushapp();
-}
-module_exit(dcbnl_exit);
+device_initcall(dcbnl_init);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index bebc735f5afc..b0e28d24e1a7 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -229,7 +229,7 @@ void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 int dccp_retransmit_skb(struct sock *sk);
 
 void dccp_send_ack(struct sock *sk);
-void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			 struct request_sock *rsk);
 
 void dccp_send_sync(struct sock *sk, const u64 seq,
@@ -270,15 +270,17 @@ int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
 
 int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 
-struct sock *dccp_create_openreq_child(struct sock *sk,
+struct sock *dccp_create_openreq_child(const struct sock *sk,
 				       const struct request_sock *req,
 				       const struct sk_buff *skb);
 
 int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
 
-struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb,
 				       struct request_sock *req,
-				       struct dst_entry *dst);
+				       struct dst_entry *dst,
+				       struct request_sock *req_unhash,
+				       bool *own_req);
 struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 			    struct request_sock *req);
 
@@ -293,7 +295,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
 void dccp_destroy_sock(struct sock *sk);
 
 void dccp_close(struct sock *sk, long timeout);
-struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
 				   struct request_sock *req);
 
 int dccp_connect(struct sock *sk);
@@ -325,13 +327,13 @@ void dccp_send_close(struct sock *sk, const int active);
 int dccp_invalid_packet(struct sk_buff *skb);
 u32 dccp_sample_rtt(struct sock *sk, long delta);
 
-static inline int dccp_bad_service_code(const struct sock *sk,
+static inline bool dccp_bad_service_code(const struct sock *sk,
 					const __be32 service)
 {
 	const struct dccp_sock *dp = dccp_sk(sk);
 
 	if (dp->dccps_service == service)
-		return 0;
+		return false;
 	return !dccp_list_has_service(dp->dccps_service_list, service);
 }
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ccf4c5629b3c..5684e14932bd 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -208,7 +208,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
 
 	if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-		reqsk_put(req);
 	} else {
 		/*
 		 * Still in RESPOND, just remove it silently.
@@ -218,6 +217,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
 		 */
 		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
 	}
+	reqsk_put(req);
 }
 EXPORT_SYMBOL(dccp_req_err);
 
@@ -390,9 +390,12 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
  *
  * This is the equivalent of TCP's tcp_v4_syn_recv_sock
  */
-struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
+				       struct sk_buff *skb,
 				       struct request_sock *req,
-				       struct dst_entry *dst)
+				       struct dst_entry *dst,
+				       struct request_sock *req_unhash,
+				       bool *own_req)
 {
 	struct inet_request_sock *ireq;
 	struct inet_sock *newinet;
@@ -425,7 +428,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
-	__inet_hash_nolisten(newsk, NULL);
+	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
 
 	return newsk;
 
@@ -443,36 +446,6 @@ put_and_exit:
 }
 EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
 
-static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
-{
-	const struct dccp_hdr *dh = dccp_hdr(skb);
-	const struct iphdr *iph = ip_hdr(skb);
-	struct sock *nsk;
-	/* Find possible connection requests. */
-	struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
-						       iph->saddr, iph->daddr);
-	if (req) {
-		nsk = dccp_check_req(sk, skb, req);
-		if (!nsk)
-			reqsk_put(req);
-		return nsk;
-	}
-	nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
-				      iph->saddr, dh->dccph_sport,
-				      iph->daddr, dh->dccph_dport,
-				      inet_iif(skb));
-	if (nsk != NULL) {
-		if (nsk->sk_state != DCCP_TIME_WAIT) {
-			bh_lock_sock(nsk);
-			return nsk;
-		}
-		inet_twsk_put(inet_twsk(nsk));
-		return NULL;
-	}
-
-	return sk;
-}
-
 static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 					   struct sk_buff *skb)
 {
@@ -498,7 +471,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 	return &rt->dst;
 }
 
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
+static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req)
 {
 	int err = -1;
 	struct sk_buff *skb;
@@ -527,7 +500,7 @@ out:
 	return err;
 }
 
-static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 {
 	int err;
 	const struct iphdr *rxiph;
@@ -624,7 +597,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = inet_reqsk_alloc(&dccp_request_sock_ops, sk);
+	req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
 	if (req == NULL)
 		goto drop;
 
@@ -704,18 +677,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	 * NOTE: the check for the packet types is done in
 	 *	 dccp_rcv_state_process
 	 */
-	if (sk->sk_state == DCCP_LISTEN) {
-		struct sock *nsk = dccp_v4_hnd_req(sk, skb);
-
-		if (nsk == NULL)
-			goto discard;
-
-		if (nsk != sk) {
-			if (dccp_child_process(sk, nsk, skb))
-				goto reset;
-			return 0;
-		}
-	}
 
 	if (dccp_rcv_state_process(sk, skb, dh, skb->len))
 		goto reset;
@@ -723,7 +684,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
 reset:
 	dccp_v4_ctl_send_reset(sk, skb);
-discard:
 	kfree_skb(skb);
 	return 0;
 }
@@ -841,15 +801,10 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 				  DCCP_SKB_CB(skb)->dccpd_ack_seq);
 	}
 
-	/* Step 2:
-	 *	Look up flow ID in table and get corresponding socket */
+lookup:
 	sk = __inet_lookup_skb(&dccp_hashinfo, skb,
 			       dh->dccph_sport, dh->dccph_dport);
-	/*
-	 * Step 2:
-	 *	If no socket ...
-	 */
-	if (sk == NULL) {
+	if (!sk) {
 		dccp_pr_debug("failed to look up flow ID in table and "
 			      "get corresponding socket\n");
 		goto no_dccp_socket;
@@ -867,6 +822,31 @@ static int dccp_v4_rcv(struct sk_buff *skb)
 		goto no_dccp_socket;
 	}
 
+	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+		struct request_sock *req = inet_reqsk(sk);
+		struct sock *nsk = NULL;
+
+		sk = req->rsk_listener;
+		if (likely(sk->sk_state == DCCP_LISTEN)) {
+			nsk = dccp_check_req(sk, skb, req);
+		} else {
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
+			goto lookup;
+		}
+		if (!nsk) {
+			reqsk_put(req);
+			goto discard_it;
+		}
+		if (nsk == sk) {
+			sock_hold(sk);
+			reqsk_put(req);
+		} else if (dccp_child_process(sk, nsk, skb)) {
+			dccp_v4_ctl_send_reset(sk, skb);
+			goto discard_it;
+		} else {
+			return 0;
+		}
+	}
 	/*
 	 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
 	 *	o if MinCsCov = 0, only packets with CsCov = 0 are accepted
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5165571f397a..db5fc2440a23 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -181,7 +181,7 @@ out:
 }
 
 
-static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
+static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -234,7 +234,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
 	kfree_skb(inet_rsk(req)->pktopts);
 }
 
-static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 {
 	const struct ipv6hdr *rxip6h;
 	struct sk_buff *skb;
@@ -290,37 +290,6 @@ static struct request_sock_ops dccp6_request_sock_ops = {
 	.syn_ack_timeout = dccp_syn_ack_timeout,
 };
 
-static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
-{
-	const struct dccp_hdr *dh = dccp_hdr(skb);
-	const struct ipv6hdr *iph = ipv6_hdr(skb);
-	struct request_sock *req;
-	struct sock *nsk;
-
-	req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
-				   &iph->daddr, inet6_iif(skb));
-	if (req) {
-		nsk = dccp_check_req(sk, skb, req);
-		if (!nsk)
-			reqsk_put(req);
-		return nsk;
-	}
-	nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
-					 &iph->saddr, dh->dccph_sport,
-					 &iph->daddr, ntohs(dh->dccph_dport),
-					 inet6_iif(skb));
-	if (nsk != NULL) {
-		if (nsk->sk_state != DCCP_TIME_WAIT) {
-			bh_lock_sock(nsk);
-			return nsk;
-		}
-		inet_twsk_put(inet_twsk(nsk));
-		return NULL;
-	}
-
-	return sk;
-}
-
 static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct request_sock *req;
@@ -350,7 +319,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk);
+	req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
 	if (req == NULL)
 		goto drop;
 
@@ -398,7 +367,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (dccp_v6_send_response(sk, req))
 		goto drop_and_free;
 
-	inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
 	return 0;
 
 drop_and_free:
@@ -408,13 +377,16 @@ drop:
 	return -1;
 }
 
-static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
+static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 					      struct sk_buff *skb,
 					      struct request_sock *req,
-					      struct dst_entry *dst)
+					      struct dst_entry *dst,
+					      struct request_sock *req_unhash,
+					      bool *own_req)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
-	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+	struct ipv6_pinfo *newnp;
+	const struct ipv6_pinfo *np = inet6_sk(sk);
 	struct inet_sock *newinet;
 	struct dccp6_sock *newdp6;
 	struct sock *newsk;
@@ -423,7 +395,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		/*
 		 *	v6 mapped
 		 */
-		newsk = dccp_v4_request_recv_sock(sk, skb, req, dst);
+		newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
+						  req_unhash, own_req);
 		if (newsk == NULL)
 			return NULL;
 
@@ -462,22 +435,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	if (sk_acceptq_is_full(sk))
 		goto out_overflow;
 
-	if (dst == NULL) {
-		struct in6_addr *final_p, final;
+	if (!dst) {
 		struct flowi6 fl6;
 
-		memset(&fl6, 0, sizeof(fl6));
-		fl6.flowi6_proto = IPPROTO_DCCP;
-		fl6.daddr = ireq->ir_v6_rmt_addr;
-		final_p = fl6_update_dst(&fl6, np->opt, &final);
-		fl6.saddr = ireq->ir_v6_loc_addr;
-		fl6.flowi6_oif = sk->sk_bound_dev_if;
-		fl6.fl6_dport = ireq->ir_rmt_port;
-		fl6.fl6_sport = htons(ireq->ir_num);
-		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
-
-		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
-		if (IS_ERR(dst))
+		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
+		if (!dst)
 			goto out;
 	}
 
@@ -515,15 +477,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	/* Clone RX bits */
 	newnp->rxopt.all = np->rxopt.all;
 
-	/* Clone pktoptions received with SYN */
 	newnp->pktoptions = NULL;
-	if (ireq->pktopts != NULL) {
-		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
-		consume_skb(ireq->pktopts);
-		ireq->pktopts = NULL;
-		if (newnp->pktoptions)
-			skb_set_owner_r(newnp->pktoptions, newsk);
-	}
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = inet6_iif(skb);
 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -552,7 +506,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		dccp_done(newsk);
 		goto out;
 	}
-	__inet_hash(newsk, NULL);
+	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	/* Clone pktoptions received with SYN, if we own the req */
+	if (*own_req && ireq->pktopts) {
+		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+		consume_skb(ireq->pktopts);
+		ireq->pktopts = NULL;
+		if (newnp->pktoptions)
+			skb_set_owner_r(newnp->pktoptions, newsk);
+	}
 
 	return newsk;
 
@@ -651,24 +613,6 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	 * NOTE: the check for the packet types is done in
 	 *	 dccp_rcv_state_process
 	 */
-	if (sk->sk_state == DCCP_LISTEN) {
-		struct sock *nsk = dccp_v6_hnd_req(sk, skb);
-
-		if (nsk == NULL)
-			goto discard;
-		/*
-		 * Queue it on the new socket if the new socket is active,
-		 * otherwise we just shortcircuit this and continue with
-		 * the new socket..
-		 */
-		if (nsk != sk) {
-			if (dccp_child_process(sk, nsk, skb))
-				goto reset;
-			if (opt_skb != NULL)
-				__kfree_skb(opt_skb);
-			return 0;
-		}
-	}
 
 	if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
 		goto reset;
@@ -715,16 +659,11 @@ static int dccp_v6_rcv(struct sk_buff *skb)
 	else
 		DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
 
-	/* Step 2:
-	 *	Look up flow ID in table and get corresponding socket */
+lookup:
 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
 			        dh->dccph_sport, dh->dccph_dport,
 				inet6_iif(skb));
-	/*
-	 * Step 2:
-	 *	If no socket ...
-	 */
-	if (sk == NULL) {
+	if (!sk) {
 		dccp_pr_debug("failed to look up flow ID in table and "
 			      "get corresponding socket\n");
 		goto no_dccp_socket;
@@ -742,6 +681,31 @@ static int dccp_v6_rcv(struct sk_buff *skb)
 		goto no_dccp_socket;
 	}
 
+	if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+		struct request_sock *req = inet_reqsk(sk);
+		struct sock *nsk = NULL;
+
+		sk = req->rsk_listener;
+		if (likely(sk->sk_state == DCCP_LISTEN)) {
+			nsk = dccp_check_req(sk, skb, req);
+		} else {
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
+			goto lookup;
+		}
+		if (!nsk) {
+			reqsk_put(req);
+			goto discard_it;
+		}
+		if (nsk == sk) {
+			sock_hold(sk);
+			reqsk_put(req);
+		} else if (dccp_child_process(sk, nsk, skb)) {
+			dccp_v6_ctl_send_reset(sk, skb);
+			goto discard_it;
+		} else {
+			return 0;
+		}
+	}
 	/*
 	 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
 	 *	o if MinCsCov = 0, only packets with CsCov = 0 are accepted
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 838f524cf11a..1994f8af646b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -72,7 +72,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 	dccp_done(sk);
 }
 
-struct sock *dccp_create_openreq_child(struct sock *sk,
+struct sock *dccp_create_openreq_child(const struct sock *sk,
 				       const struct request_sock *req,
 				       const struct sk_buff *skb)
 {
@@ -143,6 +143,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 {
 	struct sock *child = NULL;
 	struct dccp_request_sock *dreq = dccp_rsk(req);
+	bool own_req;
 
 	/* Check for retransmitted REQUEST */
 	if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
@@ -182,14 +183,13 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (dccp_parse_options(sk, dreq, skb))
 		 goto drop;
 
-	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
-	if (child == NULL)
+	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+							 req, &own_req);
+	if (!child)
 		goto listen_overflow;
 
-	inet_csk_reqsk_queue_drop(sk, req);
-	inet_csk_reqsk_queue_add(sk, req, child);
-out:
-	return child;
+	return inet_csk_complete_hashdance(sk, child, req, own_req);
+
 listen_overflow:
 	dccp_pr_debug("listen_overflow!\n");
 	DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
@@ -198,7 +198,7 @@ drop:
 		req->rsk_ops->send_reset(sk, skb);
 
 	inet_csk_reqsk_queue_drop(sk, req);
-	goto out;
+	return NULL;
 }
 
 EXPORT_SYMBOL_GPL(dccp_check_req);
@@ -236,7 +236,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
 
 EXPORT_SYMBOL_GPL(dccp_child_process);
 
-void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 			 struct request_sock *rsk)
 {
 	DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state");
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 0248e8a3460c..4ce912e691d0 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -390,7 +390,7 @@ int dccp_retransmit_skb(struct sock *sk)
 	return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC));
 }
 
-struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
 				   struct request_sock *req)
 {
 	struct dccp_hdr *dh;
@@ -398,13 +398,18 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
 	const u32 dccp_header_size = sizeof(struct dccp_hdr) +
 				     sizeof(struct dccp_hdr_ext) +
 				     sizeof(struct dccp_hdr_response);
-	struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
-					   GFP_ATOMIC);
-	if (skb == NULL)
+	struct sk_buff *skb;
+
+	/* sk is marked const to clearly express we dont hold socket lock.
+	 * sock_wmalloc() will atomically change sk->sk_wmem_alloc,
+	 * it is safe to promote sk to non const.
+	 */
+	skb = sock_wmalloc((struct sock *)sk, MAX_DCCP_HEADER, 1,
+			   GFP_ATOMIC);
+	if (!skb)
 		return NULL;
 
-	/* Reserve space for headers. */
-	skb_reserve(skb, sk->sk_prot->max_header);
+	skb_reserve(skb, MAX_DCCP_HEADER);
 
 	skb_dst_set(skb, dst_clone(dst));
 
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index d8346d0eadeb..3d3fda05b32d 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 #include <linux/kfifo.h>
 #include <linux/vmalloc.h>
+#include <linux/time64.h>
 #include <linux/gfp.h>
 #include <net/net_namespace.h>
 
@@ -47,20 +48,20 @@ static struct {
 	struct kfifo	  fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
-	struct timespec	  tstart;
+	struct timespec64 tstart;
 } dccpw;
 
 static void printl(const char *fmt, ...)
 {
 	va_list args;
 	int len;
-	struct timespec now;
+	struct timespec64 now;
 	char tbuf[256];
 
 	va_start(args, fmt);
-	getnstimeofday(&now);
+	getnstimeofday64(&now);
 
-	now = timespec_sub(now, dccpw.tstart);
+	now = timespec64_sub(now, dccpw.tstart);
 
 	len = sprintf(tbuf, "%lu.%06lu ",
 		      (unsigned long) now.tv_sec,
@@ -110,7 +111,7 @@ static struct jprobe dccp_send_probe = {
 static int dccpprobe_open(struct inode *inode, struct file *file)
 {
 	kfifo_reset(&dccpw.fifo);
-	getnstimeofday(&dccpw.tstart);
+	getnstimeofday64(&dccpw.tstart);
 	return 0;
 }
 
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 4507b188fc51..482730cd8a56 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
 	return err;
 }
 
-static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb)
+static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *)dst;
@@ -246,8 +246,9 @@ static int dn_long_output(struct neighbour *neigh, struct sock *sk,
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
-		       NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+		       &init_net, sk, skb, NULL, neigh->dev,
+		       dn_neigh_output_packet);
 }
 
 /*
@@ -286,8 +287,9 @@ static int dn_short_output(struct neighbour *neigh, struct sock *sk,
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
-		       NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+		       &init_net, sk, skb, NULL, neigh->dev,
+		       dn_neigh_output_packet);
 }
 
 /*
@@ -327,11 +329,12 @@ static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
-		       NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+		       &init_net, sk, skb, NULL, neigh->dev,
+		       dn_neigh_output_packet);
 }
 
-int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb)
+int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *) dst;
@@ -375,7 +378,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb)
 /*
  * Ethernet router hello message received
  */
-int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
+int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
 
@@ -437,7 +440,7 @@ int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
 /*
  * Endnode hello message received
  */
-int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb)
+int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
 	struct neighbour *neigh;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index a321eac9fd0c..7ac086d5c0c0 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -714,7 +714,8 @@ out:
 	return ret;
 }
 
-static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb)
+static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
+			    struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct sock *sk = NULL;
@@ -814,8 +815,8 @@ free_out:
 
 int dn_nsp_rx(struct sk_buff *skb)
 {
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN,
+		       &init_net, NULL, skb, skb->dev, NULL,
 		       dn_nsp_rx_packet);
 }
 
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 1aaa51ebbda6..849805e7af52 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -85,7 +85,7 @@ static void dn_nsp_send(struct sk_buff *skb)
 	if (dst) {
 try_again:
 		skb_dst_set(skb, dst);
-		dst_output(skb);
+		dst_output(&init_net, skb->sk, skb);
 		return;
 	}
 
@@ -582,7 +582,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
 	 * associations.
 	 */
 	skb_dst_set(skb, dst_clone(dst));
-	dst_output(skb);
+	dst_output(&init_net, skb->sk, skb);
 }
 
 
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 03227ffd19ce..607a14f20d88 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb)
  *
  * Returns: result of input function if route is found, error code otherwise
  */
-static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb)
+static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb;
 	int err;
@@ -573,8 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb)
 	ptr++;
 	cb->hops = *ptr++; /* Visit Count */
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
+		       &init_net, NULL, skb, skb->dev, NULL,
 		       dn_route_rx_packet);
 
 drop_it:
@@ -601,8 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb)
 	ptr += 2;
 	cb->hops = *ptr & 0x3f;
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
+		       &init_net, NULL, skb, skb->dev, NULL,
 		       dn_route_rx_packet);
 
 drop_it:
@@ -610,7 +610,7 @@ drop_it:
 	return NET_RX_DROP;
 }
 
-static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
+static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	/*
 	 * I know we drop the packet here, but thats considered success in
@@ -620,7 +620,7 @@ static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
 	return NET_RX_SUCCESS;
 }
 
-static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb)
+static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	dn_dev_hello(skb);
 	dn_neigh_pointopoint_hello(skb);
@@ -706,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 		switch (flags & DN_RT_CNTL_MSK) {
 		case DN_RT_PKT_HELO:
 			return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
-				       NULL, skb, skb->dev, NULL,
+				       &init_net, NULL, skb, skb->dev, NULL,
 				       dn_route_ptp_hello);
 
 		case DN_RT_PKT_L1RT:
 		case DN_RT_PKT_L2RT:
 			return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
-				       NULL, skb, skb->dev, NULL,
+				       &init_net, NULL, skb, skb->dev, NULL,
 				       dn_route_discard);
 		case DN_RT_PKT_ERTH:
 			return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
-				       NULL, skb, skb->dev, NULL,
+				       &init_net, NULL, skb, skb->dev, NULL,
 				       dn_neigh_router_hello);
 
 		case DN_RT_PKT_EEDH:
 			return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
-				       NULL, skb, skb->dev, NULL,
+				       &init_net, NULL, skb, skb->dev, NULL,
 				       dn_neigh_endnode_hello);
 		}
 	} else {
@@ -744,7 +744,7 @@ out:
 	return NET_RX_DROP;
 }
 
-static int dn_output(struct sock *sk, struct sk_buff *skb)
+static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *)dst;
@@ -770,8 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb)
 	cb->rt_flags |= DN_RT_F_IE;
 	cb->hops = 0;
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb,
-		       NULL, dev,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
+		       &init_net, sk, skb, NULL, dev,
 		       dn_to_neigh_output);
 
 error:
@@ -789,9 +789,7 @@ static int dn_forward(struct sk_buff *skb)
 	struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
 	struct dn_route *rt;
 	int header_len;
-#ifdef CONFIG_NETFILTER
 	struct net_device *dev = skb->dev;
-#endif
 
 	if (skb->pkt_type != PACKET_HOST)
 		goto drop;
@@ -819,8 +817,8 @@ static int dn_forward(struct sk_buff *skb)
 	if (rt->rt_flags & RTCF_DOREDIRECT)
 		cb->rt_flags |= DN_RT_F_IE;
 
-	return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb,
-		       dev, skb->dev,
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
+		       &init_net, NULL, skb, dev, skb->dev,
 		       dn_to_neigh_output);
 
 drop:
@@ -832,7 +830,7 @@ drop:
  * Used to catch bugs. This should never normally get
  * called.
  */
-static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb)
+static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 
@@ -1469,7 +1467,7 @@ make_route:
 
 	rt->n = neigh;
 	rt->dst.lastuse = jiffies;
-	rt->dst.output = dn_rt_bug_sk;
+	rt->dst.output = dn_rt_bug_out;
 	switch (res.type) {
 	case RTN_UNICAST:
 		rt->dst.input = dn_forward;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index af34fc9bdf69..85f2fdc360c2 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
 }
 
 
-static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
+static unsigned int dnrmg_hook(void *priv,
 			struct sk_buff *skb,
 			const struct nf_hook_state *state)
 {
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 31cd4fd75486..c79b85eb4d4c 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -122,7 +122,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 					goto bad_option_value;
 
 				kdebug("dns error no. = %lu", derrno);
-				prep->type_data[0] = ERR_PTR(-derrno);
+				prep->payload.data[dns_key_error] = ERR_PTR(-derrno);
 				continue;
 			}
 
@@ -137,8 +137,8 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 
 	/* don't cache the result if we're caching an error saying there's no
 	 * result */
-	if (prep->type_data[0]) {
-		kleave(" = 0 [h_error %ld]", PTR_ERR(prep->type_data[0]));
+	if (prep->payload.data[dns_key_error]) {
+		kleave(" = 0 [h_error %ld]", PTR_ERR(prep->payload.data[dns_key_error]));
 		return 0;
 	}
 
@@ -155,7 +155,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 	memcpy(upayload->data, data, result_len);
 	upayload->data[result_len] = '\0';
 
-	prep->payload[0] = upayload;
+	prep->payload.data[dns_key_data] = upayload;
 	kleave(" = 0");
 	return 0;
 }
@@ -167,7 +167,7 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
 {
 	pr_devel("==>%s()\n", __func__);
 
-	kfree(prep->payload[0]);
+	kfree(prep->payload.data[dns_key_data]);
 }
 
 /*
@@ -223,10 +223,10 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data)
  */
 static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 {
-	int err = key->type_data.x[0];
-
 	seq_puts(m, key->description);
 	if (key_is_instantiated(key)) {
+		int err = PTR_ERR(key->payload.data[dns_key_error]);
+
 		if (err)
 			seq_printf(m, ": %d", err);
 		else
@@ -241,8 +241,10 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
 static long dns_resolver_read(const struct key *key,
 			      char __user *buffer, size_t buflen)
 {
-	if (key->type_data.x[0])
-		return key->type_data.x[0];
+	int err = PTR_ERR(key->payload.data[dns_key_error]);
+
+	if (err)
+		return err;
 
 	return user_read(key, buffer, buflen);
 }
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 39d2c39bdf87..4677b6fa6dda 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -70,7 +70,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
 	      const char *options, char **_result, time_t *_expiry)
 {
 	struct key *rkey;
-	struct user_key_payload *upayload;
+	const struct user_key_payload *upayload;
 	const struct cred *saved_cred;
 	size_t typelen, desclen;
 	char *desc, *cp;
@@ -137,12 +137,11 @@ int dns_query(const char *type, const char *name, size_t namelen,
 		goto put;
 
 	/* If the DNS server gave an error, return that to the caller */
-	ret = rkey->type_data.x[0];
+	ret = PTR_ERR(rkey->payload.data[dns_key_error]);
 	if (ret)
 		goto put;
 
-	upayload = rcu_dereference_protected(rkey->payload.data,
-					     lockdep_is_held(&rkey->sem));
+	upayload = user_key_payload(rkey);
 	len = upayload->datalen;
 
 	ret = -ENOMEM;
diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h
index 7af1ed39c009..0c570d40e4d6 100644
--- a/net/dns_resolver/internal.h
+++ b/net/dns_resolver/internal.h
@@ -23,6 +23,14 @@
 #include <linux/sched.h>
 
 /*
+ * Layout of key payload words.
+ */
+enum {
+	dns_key_data,
+	dns_key_error,
+};
+
+/*
  * dns_key.c
  */
 extern const struct cred *dns_resolver_cache;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index adb5325f4934..1eba07feb34a 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -327,8 +327,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 
 		ret = dsa_slave_create(ds, parent, i, pd->port_names[i]);
 		if (ret < 0) {
-			netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n",
-				   index, i, pd->port_names[i]);
+			netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
+				   index, i, pd->port_names[i], ret);
 			ret = 0;
 		}
 	}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7d91f4612ac0..7bc787b095c8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -242,17 +242,15 @@ static int dsa_bridge_check_vlan_range(struct dsa_switch *ds,
 }
 
 static int dsa_slave_port_vlan_add(struct net_device *dev,
-				   struct switchdev_obj *obj)
+				   const struct switchdev_obj_port_vlan *vlan,
+				   struct switchdev_trans *trans)
 {
-	struct switchdev_obj_vlan *vlan = &obj->u.vlan;
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
-	u16 vid;
 	int err;
 
-	switch (obj->trans) {
-	case SWITCHDEV_TRANS_PREPARE:
-		if (!ds->drv->port_vlan_add || !ds->drv->port_pvid_set)
+	if (switchdev_trans_ph_prepare(trans)) {
+		if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
 			return -EOPNOTSUPP;
 
 		/* If the requested port doesn't belong to the same bridge as
@@ -263,50 +261,35 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
 						  vlan->vid_end);
 		if (err)
 			return err;
-		break;
-	case SWITCHDEV_TRANS_COMMIT:
-		for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
-			err = ds->drv->port_vlan_add(ds, p->port, vid,
-						     vlan->flags &
-						     BRIDGE_VLAN_INFO_UNTAGGED);
-			if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID)
-				err = ds->drv->port_pvid_set(ds, p->port, vid);
-			if (err)
-				return err;
-		}
-		break;
-	default:
-		return -EOPNOTSUPP;
+
+		err = ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
+		if (err)
+			return err;
+	} else {
+		err = ds->drv->port_vlan_add(ds, p->port, vlan, trans);
+		if (err)
+			return err;
 	}
 
 	return 0;
 }
 
 static int dsa_slave_port_vlan_del(struct net_device *dev,
-				   struct switchdev_obj *obj)
+				   const struct switchdev_obj_port_vlan *vlan)
 {
-	struct switchdev_obj_vlan *vlan = &obj->u.vlan;
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
-	u16 vid;
-	int err;
 
 	if (!ds->drv->port_vlan_del)
 		return -EOPNOTSUPP;
 
-	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
-		err = ds->drv->port_vlan_del(ds, p->port, vid);
-		if (err)
-			return err;
-	}
-
-	return 0;
+	return ds->drv->port_vlan_del(ds, p->port, vlan);
 }
 
 static int dsa_slave_port_vlan_dump(struct net_device *dev,
-				    struct switchdev_obj *obj)
+				    struct switchdev_obj_port_vlan *vlan,
+				    switchdev_obj_dump_cb_t *cb)
 {
-	struct switchdev_obj_vlan *vlan = &obj->u.vlan;
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 	DECLARE_BITMAP(members, DSA_MAX_PORTS);
@@ -338,7 +321,7 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
 		if (test_bit(p->port, untagged))
 			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
 
-		err = obj->cb(dev, obj);
+		err = cb(&vlan->obj);
 		if (err)
 			break;
 	}
@@ -347,65 +330,48 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
 }
 
 static int dsa_slave_port_fdb_add(struct net_device *dev,
-				  struct switchdev_obj *obj)
+				  const struct switchdev_obj_port_fdb *fdb,
+				  struct switchdev_trans *trans)
 {
-	struct switchdev_obj_fdb *fdb = &obj->u.fdb;
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
-	int ret = -EOPNOTSUPP;
+	int ret;
 
-	if (obj->trans == SWITCHDEV_TRANS_PREPARE)
-		ret = ds->drv->port_fdb_add ? 0 : -EOPNOTSUPP;
-	else if (obj->trans == SWITCHDEV_TRANS_COMMIT)
-		ret = ds->drv->port_fdb_add(ds, p->port, fdb->addr, fdb->vid);
+	if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add)
+		return -EOPNOTSUPP;
+
+	if (switchdev_trans_ph_prepare(trans))
+		ret = ds->drv->port_fdb_prepare(ds, p->port, fdb, trans);
+	else
+		ret = ds->drv->port_fdb_add(ds, p->port, fdb, trans);
 
 	return ret;
 }
 
 static int dsa_slave_port_fdb_del(struct net_device *dev,
-				  struct switchdev_obj *obj)
+				  const struct switchdev_obj_port_fdb *fdb)
 {
-	struct switchdev_obj_fdb *fdb = &obj->u.fdb;
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 	int ret = -EOPNOTSUPP;
 
 	if (ds->drv->port_fdb_del)
-		ret = ds->drv->port_fdb_del(ds, p->port, fdb->addr, fdb->vid);
+		ret = ds->drv->port_fdb_del(ds, p->port, fdb);
 
 	return ret;
 }
 
 static int dsa_slave_port_fdb_dump(struct net_device *dev,
-				   struct switchdev_obj *obj)
+				   struct switchdev_obj_port_fdb *fdb,
+				   switchdev_obj_dump_cb_t *cb)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
-	unsigned char addr[ETH_ALEN] = { 0 };
-	u16 vid = 0;
-	int ret;
-
-	if (!ds->drv->port_fdb_getnext)
-		return -EOPNOTSUPP;
-
-	for (;;) {
-		bool is_static;
 
-		ret = ds->drv->port_fdb_getnext(ds, p->port, addr, &vid,
-						&is_static);
-		if (ret < 0)
-			break;
-
-		obj->u.fdb.addr = addr;
-		obj->u.fdb.vid = vid;
-		obj->u.fdb.ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
-
-		ret = obj->cb(dev, obj);
-		if (ret < 0)
-			break;
-	}
+	if (ds->drv->port_fdb_dump)
+		return ds->drv->port_fdb_dump(ds, p->port, fdb, cb);
 
-	return ret == -ENOENT ? 0 : ret;
+	return -EOPNOTSUPP;
 }
 
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -456,15 +422,16 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
 }
 
 static int dsa_slave_port_attr_set(struct net_device *dev,
-				   struct switchdev_attr *attr)
+				   const struct switchdev_attr *attr,
+				   struct switchdev_trans *trans)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 	int ret;
 
 	switch (attr->id) {
-	case SWITCHDEV_ATTR_PORT_STP_STATE:
-		if (attr->trans == SWITCHDEV_TRANS_PREPARE)
+	case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+		if (switchdev_trans_ph_prepare(trans))
 			ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP;
 		else
 			ret = ds->drv->port_stp_update(ds, p->port,
@@ -479,7 +446,8 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
 }
 
 static int dsa_slave_port_obj_add(struct net_device *dev,
-				  struct switchdev_obj *obj)
+				  const struct switchdev_obj *obj,
+				  struct switchdev_trans *trans)
 {
 	int err;
 
@@ -489,11 +457,15 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 	 */
 
 	switch (obj->id) {
-	case SWITCHDEV_OBJ_PORT_FDB:
-		err = dsa_slave_port_fdb_add(dev, obj);
+	case SWITCHDEV_OBJ_ID_PORT_FDB:
+		err = dsa_slave_port_fdb_add(dev,
+					     SWITCHDEV_OBJ_PORT_FDB(obj),
+					     trans);
 		break;
-	case SWITCHDEV_OBJ_PORT_VLAN:
-		err = dsa_slave_port_vlan_add(dev, obj);
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = dsa_slave_port_vlan_add(dev,
+					      SWITCHDEV_OBJ_PORT_VLAN(obj),
+					      trans);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -504,16 +476,18 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 }
 
 static int dsa_slave_port_obj_del(struct net_device *dev,
-				  struct switchdev_obj *obj)
+				  const struct switchdev_obj *obj)
 {
 	int err;
 
 	switch (obj->id) {
-	case SWITCHDEV_OBJ_PORT_FDB:
-		err = dsa_slave_port_fdb_del(dev, obj);
+	case SWITCHDEV_OBJ_ID_PORT_FDB:
+		err = dsa_slave_port_fdb_del(dev,
+					     SWITCHDEV_OBJ_PORT_FDB(obj));
 		break;
-	case SWITCHDEV_OBJ_PORT_VLAN:
-		err = dsa_slave_port_vlan_del(dev, obj);
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = dsa_slave_port_vlan_del(dev,
+					      SWITCHDEV_OBJ_PORT_VLAN(obj));
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -524,16 +498,21 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 }
 
 static int dsa_slave_port_obj_dump(struct net_device *dev,
-				   struct switchdev_obj *obj)
+				   struct switchdev_obj *obj,
+				   switchdev_obj_dump_cb_t *cb)
 {
 	int err;
 
 	switch (obj->id) {
-	case SWITCHDEV_OBJ_PORT_FDB:
-		err = dsa_slave_port_fdb_dump(dev, obj);
+	case SWITCHDEV_OBJ_ID_PORT_FDB:
+		err = dsa_slave_port_fdb_dump(dev,
+					      SWITCHDEV_OBJ_PORT_FDB(obj),
+					      cb);
 		break;
-	case SWITCHDEV_OBJ_PORT_VLAN:
-		err = dsa_slave_port_vlan_dump(dev, obj);
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		err = dsa_slave_port_vlan_dump(dev,
+					       SWITCHDEV_OBJ_PORT_VLAN(obj),
+					       cb);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -587,7 +566,7 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
 	struct dsa_switch *ds = p->parent;
 
 	switch (attr->id) {
-	case SWITCHDEV_ATTR_PORT_PARENT_ID:
+	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
 		attr->u.ppid.id_len = sizeof(ds->index);
 		memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
 		break;
@@ -967,6 +946,10 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = {
 	.switchdev_port_obj_dump	= dsa_slave_port_obj_dump,
 };
 
+static struct device_type dsa_type = {
+	.name	= "dsa",
+};
+
 static void dsa_slave_adjust_link(struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -1015,8 +998,10 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
 	struct dsa_switch *ds = p->parent;
 
 	p->phy = ds->slave_mii_bus->phy_map[addr];
-	if (!p->phy)
+	if (!p->phy) {
+		netdev_err(slave_dev, "no phy at %d\n", addr);
 		return -ENODEV;
+	}
 
 	/* Use already configured phy mode */
 	if (p->phy_interface == PHY_INTERFACE_MODE_NA)
@@ -1050,7 +1035,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 		 */
 		ret = of_phy_register_fixed_link(port_dn);
 		if (ret) {
-			netdev_err(slave_dev, "failed to register fixed PHY\n");
+			netdev_err(slave_dev, "failed to register fixed PHY: %d\n", ret);
 			return ret;
 		}
 		phy_is_fixed = true;
@@ -1061,17 +1046,20 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 		phy_flags = ds->drv->get_phy_flags(ds, p->port);
 
 	if (phy_dn) {
-		ret = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
+		int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
+
 		/* If this PHY address is part of phys_mii_mask, which means
 		 * that we need to divert reads and writes to/from it, then we
 		 * want to bind this device using the slave MII bus created by
 		 * DSA to make that happen.
 		 */
-		if (!phy_is_fixed && ret >= 0 &&
-		    (ds->phys_mii_mask & (1 << ret))) {
-			ret = dsa_slave_phy_connect(p, slave_dev, ret);
-			if (ret)
+		if (!phy_is_fixed && phy_id >= 0 &&
+		    (ds->phys_mii_mask & (1 << phy_id))) {
+			ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
+			if (ret) {
+				netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
 				return ret;
+			}
 		} else {
 			p->phy = of_phy_connect(slave_dev, phy_dn,
 						dsa_slave_adjust_link,
@@ -1088,8 +1076,10 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
 	 */
 	if (!p->phy) {
 		ret = dsa_slave_phy_connect(p, slave_dev, p->port);
-		if (ret)
+		if (ret) {
+			netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret);
 			return ret;
+		}
 	} else {
 		netdev_info(slave_dev, "attached PHY at address %d [%s]\n",
 			    p->phy->addr, p->phy->drv->name);
@@ -1155,6 +1145,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	slave_dev->priv_flags |= IFF_NO_QUEUE;
 	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
 	slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
+	SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
 
 	netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
 				 NULL);
@@ -1200,6 +1191,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 
 	ret = dsa_slave_phy_setup(p, slave_dev);
 	if (ret) {
+		netdev_err(master, "error %d setting up slave phy\n", ret);
 		free_netdev(slave_dev);
 		return ret;
 	}
@@ -1253,7 +1245,7 @@ int dsa_slave_netdevice_event(struct notifier_block *unused,
 			goto out;
 
 		err = dsa_slave_master_changed(dev);
-		if (err)
+		if (err && err != -EOPNOTSUPP)
 			netdev_warn(dev, "failed to reflect master change\n");
 
 		break;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index d850fdc828f9..9e63f252a89e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -127,7 +127,7 @@ u32 eth_get_headlen(void *data, unsigned int len)
 	struct flow_keys keys;
 
 	/* this should never happen, but better safe than sorry */
-	if (len < sizeof(*eth))
+	if (unlikely(len < sizeof(*eth)))
 		return len;
 
 	/* parse any remaining L2/L3 headers, check for L4 */
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index ea339fa94c27..b4e17a7c0df0 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -7,6 +7,15 @@
 #include <net/inet_frag.h>
 #include <net/6lowpan.h>
 
+typedef unsigned __bitwise__ lowpan_rx_result;
+#define RX_CONTINUE		((__force lowpan_rx_result) 0u)
+#define RX_DROP_UNUSABLE	((__force lowpan_rx_result) 1u)
+#define RX_DROP			((__force lowpan_rx_result) 2u)
+#define RX_QUEUED		((__force lowpan_rx_result) 3u)
+
+#define LOWPAN_DISPATCH_FRAG1           0xc0
+#define LOWPAN_DISPATCH_FRAGN           0xe0
+
 struct lowpan_create_arg {
 	u16 tag;
 	u16 d_size;
@@ -40,7 +49,7 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
 
 /* private device info */
 struct lowpan_dev_info {
-	struct net_device	*real_dev; /* real WPAN device ptr */
+	struct net_device	*wdev; /* wpan device ptr */
 	u16			fragment_tag;
 };
 
@@ -62,4 +71,7 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
 			 const void *_saddr, unsigned int len);
 netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev);
 
+int lowpan_iphc_decompress(struct sk_buff *skb);
+lowpan_rx_result lowpan_rx_h_ipv6(struct sk_buff *skb);
+
 #endif /* __IEEE802154_6LOWPAN_I_H__ */
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 953b1c49f5d1..20c49c724ba0 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -61,7 +61,7 @@ static struct header_ops lowpan_header_ops = {
 static struct lock_class_key lowpan_tx_busylock;
 static struct lock_class_key lowpan_netdev_xmit_lock_key;
 
-static void lowpan_set_lockdep_class_one(struct net_device *dev,
+static void lowpan_set_lockdep_class_one(struct net_device *ldev,
 					 struct netdev_queue *txq,
 					 void *_unused)
 {
@@ -69,35 +69,47 @@ static void lowpan_set_lockdep_class_one(struct net_device *dev,
 			  &lowpan_netdev_xmit_lock_key);
 }
 
-static int lowpan_dev_init(struct net_device *dev)
+static int lowpan_dev_init(struct net_device *ldev)
 {
-	netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
-	dev->qdisc_tx_busylock = &lowpan_tx_busylock;
+	netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL);
+	ldev->qdisc_tx_busylock = &lowpan_tx_busylock;
+	return 0;
+}
+
+static int lowpan_open(struct net_device *dev)
+{
+	if (!open_count)
+		lowpan_rx_init();
+	open_count++;
+	return 0;
+}
+
+static int lowpan_stop(struct net_device *dev)
+{
+	open_count--;
+	if (!open_count)
+		lowpan_rx_exit();
 	return 0;
 }
 
 static const struct net_device_ops lowpan_netdev_ops = {
 	.ndo_init		= lowpan_dev_init,
 	.ndo_start_xmit		= lowpan_xmit,
+	.ndo_open		= lowpan_open,
+	.ndo_stop		= lowpan_stop,
 };
 
-static void lowpan_setup(struct net_device *dev)
+static void lowpan_setup(struct net_device *ldev)
 {
-	dev->addr_len		= IEEE802154_ADDR_LEN;
-	memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
-	dev->type		= ARPHRD_6LOWPAN;
-	/* Frame Control + Sequence Number + Address fields + Security Header */
-	dev->hard_header_len	= 2 + 1 + 20 + 14;
-	dev->needed_tailroom	= 2; /* FCS */
-	dev->mtu		= IPV6_MIN_MTU;
-	dev->priv_flags		|= IFF_NO_QUEUE;
-	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
-	dev->watchdog_timeo	= 0;
-
-	dev->netdev_ops		= &lowpan_netdev_ops;
-	dev->header_ops		= &lowpan_header_ops;
-	dev->destructor		= free_netdev;
-	dev->features		|= NETIF_F_NETNS_LOCAL;
+	memset(ldev->broadcast, 0xff, IEEE802154_ADDR_LEN);
+	/* We need an ipv6hdr as minimum len when calling xmit */
+	ldev->hard_header_len	= sizeof(struct ipv6hdr);
+	ldev->flags		= IFF_BROADCAST | IFF_MULTICAST;
+
+	ldev->netdev_ops	= &lowpan_netdev_ops;
+	ldev->header_ops	= &lowpan_header_ops;
+	ldev->destructor	= free_netdev;
+	ldev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
 static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -109,10 +121,10 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
-static int lowpan_newlink(struct net *src_net, struct net_device *dev,
+static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
 			  struct nlattr *tb[], struct nlattr *data[])
 {
-	struct net_device *real_dev;
+	struct net_device *wdev;
 	int ret;
 
 	ASSERT_RTNL();
@@ -120,58 +132,56 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
 	pr_debug("adding new link\n");
 
 	if (!tb[IFLA_LINK] ||
-	    !net_eq(dev_net(dev), &init_net))
+	    !net_eq(dev_net(ldev), &init_net))
 		return -EINVAL;
-	/* find and hold real wpan device */
-	real_dev = dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK]));
-	if (!real_dev)
+	/* find and hold wpan device */
+	wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK]));
+	if (!wdev)
 		return -ENODEV;
-	if (real_dev->type != ARPHRD_IEEE802154) {
-		dev_put(real_dev);
+	if (wdev->type != ARPHRD_IEEE802154) {
+		dev_put(wdev);
 		return -EINVAL;
 	}
 
-	if (real_dev->ieee802154_ptr->lowpan_dev) {
-		dev_put(real_dev);
+	if (wdev->ieee802154_ptr->lowpan_dev) {
+		dev_put(wdev);
 		return -EBUSY;
 	}
 
-	lowpan_dev_info(dev)->real_dev = real_dev;
+	lowpan_dev_info(ldev)->wdev = wdev;
 	/* Set the lowpan hardware address to the wpan hardware address. */
-	memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
-
-	lowpan_netdev_setup(dev, LOWPAN_LLTYPE_IEEE802154);
-
-	ret = register_netdevice(dev);
+	memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN);
+	/* We need headroom for possible wpan_dev_hard_header call and tailroom
+	 * for encryption/fcs handling. The lowpan interface will replace
+	 * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN
+	 * header has LOWPAN_IPHC_MAX_HEADER_LEN more bytes than the IPv6
+	 * header.
+	 */
+	ldev->needed_headroom = LOWPAN_IPHC_MAX_HEADER_LEN +
+				wdev->needed_headroom;
+	ldev->needed_tailroom = wdev->needed_tailroom;
+
+	lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154);
+
+	ret = register_netdevice(ldev);
 	if (ret < 0) {
-		dev_put(real_dev);
+		dev_put(wdev);
 		return ret;
 	}
 
-	real_dev->ieee802154_ptr->lowpan_dev = dev;
-	if (!open_count)
-		lowpan_rx_init();
-
-	open_count++;
-
+	wdev->ieee802154_ptr->lowpan_dev = ldev;
 	return 0;
 }
 
-static void lowpan_dellink(struct net_device *dev, struct list_head *head)
+static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
 {
-	struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
-	struct net_device *real_dev = lowpan_dev->real_dev;
+	struct net_device *wdev = lowpan_dev_info(ldev)->wdev;
 
 	ASSERT_RTNL();
 
-	open_count--;
-
-	if (!open_count)
-		lowpan_rx_exit();
-
-	real_dev->ieee802154_ptr->lowpan_dev = NULL;
-	unregister_netdevice(dev);
-	dev_put(real_dev);
+	wdev->ieee802154_ptr->lowpan_dev = NULL;
+	unregister_netdevice(ldev);
+	dev_put(wdev);
 }
 
 static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
@@ -196,9 +206,9 @@ static inline void lowpan_netlink_fini(void)
 static int lowpan_device_event(struct notifier_block *unused,
 			       unsigned long event, void *ptr)
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
 
-	if (dev->type != ARPHRD_IEEE802154)
+	if (wdev->type != ARPHRD_IEEE802154)
 		goto out;
 
 	switch (event) {
@@ -207,8 +217,8 @@ static int lowpan_device_event(struct notifier_block *unused,
 		 * also delete possible lowpan interfaces which belongs
 		 * to the wpan interface.
 		 */
-		if (dev->ieee802154_ptr && dev->ieee802154_ptr->lowpan_dev)
-			lowpan_dellink(dev->ieee802154_ptr->lowpan_dev, NULL);
+		if (wdev->ieee802154_ptr->lowpan_dev)
+			lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
 		break;
 	default:
 		break;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 214d44aef35b..6b437e8760d3 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -32,21 +32,10 @@
 
 static const char lowpan_frags_cache_name[] = "lowpan-frags";
 
-struct lowpan_frag_info {
-	u16 d_tag;
-	u16 d_size;
-	u8 d_offset;
-};
-
-static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
-{
-	return (struct lowpan_frag_info *)skb->cb;
-}
-
 static struct inet_frags lowpan_frags;
 
 static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
-			     struct sk_buff *prev, struct net_device *dev);
+			     struct sk_buff *prev, struct net_device *ldev);
 
 static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
 				     const struct ieee802154_addr *saddr,
@@ -111,7 +100,7 @@ out:
 }
 
 static inline struct lowpan_frag_queue *
-fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
+fq_find(struct net *net, const struct lowpan_802154_cb *cb,
 	const struct ieee802154_addr *src,
 	const struct ieee802154_addr *dst)
 {
@@ -121,12 +110,12 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
 		net_ieee802154_lowpan(net);
 
-	arg.tag = frag_info->d_tag;
-	arg.d_size = frag_info->d_size;
+	arg.tag = cb->d_tag;
+	arg.d_size = cb->d_size;
 	arg.src = src;
 	arg.dst = dst;
 
-	hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
+	hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
 
 	q = inet_frag_find(&ieee802154_lowpan->frags,
 			   &lowpan_frags, &arg, hash);
@@ -138,17 +127,17 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
 }
 
 static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
-			     struct sk_buff *skb, const u8 frag_type)
+			     struct sk_buff *skb, u8 frag_type)
 {
 	struct sk_buff *prev, *next;
-	struct net_device *dev;
+	struct net_device *ldev;
 	int end, offset;
 
 	if (fq->q.flags & INET_FRAG_COMPLETE)
 		goto err;
 
-	offset = lowpan_cb(skb)->d_offset << 3;
-	end = lowpan_cb(skb)->d_size;
+	offset = lowpan_802154_cb(skb)->d_offset << 3;
+	end = lowpan_802154_cb(skb)->d_size;
 
 	/* Is this the final fragment? */
 	if (offset + skb->len == end) {
@@ -174,13 +163,16 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
 	 * this fragment, right?
 	 */
 	prev = fq->q.fragments_tail;
-	if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) {
+	if (!prev ||
+	    lowpan_802154_cb(prev)->d_offset <
+	    lowpan_802154_cb(skb)->d_offset) {
 		next = NULL;
 		goto found;
 	}
 	prev = NULL;
 	for (next = fq->q.fragments; next != NULL; next = next->next) {
-		if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset)
+		if (lowpan_802154_cb(next)->d_offset >=
+		    lowpan_802154_cb(skb)->d_offset)
 			break;	/* bingo! */
 		prev = next;
 	}
@@ -195,18 +187,15 @@ found:
 	else
 		fq->q.fragments = skb;
 
-	dev = skb->dev;
-	if (dev)
+	ldev = skb->dev;
+	if (ldev)
 		skb->dev = NULL;
 
 	fq->q.stamp = skb->tstamp;
-	if (frag_type == LOWPAN_DISPATCH_FRAG1) {
-		/* Calculate uncomp. 6lowpan header to estimate full size */
-		fq->q.meat += lowpan_uncompress_size(skb, NULL);
+	if (frag_type == LOWPAN_DISPATCH_FRAG1)
 		fq->q.flags |= INET_FRAG_FIRST_IN;
-	} else {
-		fq->q.meat += skb->len;
-	}
+
+	fq->q.meat += skb->len;
 	add_frag_mem_limit(fq->q.net, skb->truesize);
 
 	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
@@ -215,7 +204,7 @@ found:
 		unsigned long orefdst = skb->_skb_refdst;
 
 		skb->_skb_refdst = 0UL;
-		res = lowpan_frag_reasm(fq, prev, dev);
+		res = lowpan_frag_reasm(fq, prev, ldev);
 		skb->_skb_refdst = orefdst;
 		return res;
 	}
@@ -235,7 +224,7 @@ err:
  *	the last and the first frames arrived and all the bits are here.
  */
 static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
-			     struct net_device *dev)
+			     struct net_device *ldev)
 {
 	struct sk_buff *fp, *head = fq->q.fragments;
 	int sum_truesize;
@@ -313,7 +302,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
 	sub_frag_mem_limit(fq->q.net, sum_truesize);
 
 	head->next = NULL;
-	head->dev = dev;
+	head->dev = ldev;
 	head->tstamp = fq->q.stamp;
 
 	fq->q.fragments = NULL;
@@ -325,24 +314,87 @@ out_oom:
 	return -1;
 }
 
-static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
-				struct lowpan_frag_info *frag_info)
+static int lowpan_frag_rx_handlers_result(struct sk_buff *skb,
+					  lowpan_rx_result res)
+{
+	switch (res) {
+	case RX_QUEUED:
+		return NET_RX_SUCCESS;
+	case RX_CONTINUE:
+		/* nobody cared about this packet */
+		net_warn_ratelimited("%s: received unknown dispatch\n",
+				     __func__);
+
+		/* fall-through */
+	default:
+		/* all others failure */
+		return NET_RX_DROP;
+	}
+}
+
+static lowpan_rx_result lowpan_frag_rx_h_iphc(struct sk_buff *skb)
+{
+	int ret;
+
+	if (!lowpan_is_iphc(*skb_network_header(skb)))
+		return RX_CONTINUE;
+
+	ret = lowpan_iphc_decompress(skb);
+	if (ret < 0)
+		return RX_DROP;
+
+	return RX_QUEUED;
+}
+
+static int lowpan_invoke_frag_rx_handlers(struct sk_buff *skb)
+{
+	lowpan_rx_result res;
+
+#define CALL_RXH(rxh)			\
+	do {				\
+		res = rxh(skb);	\
+		if (res != RX_CONTINUE)	\
+			goto rxh_next;	\
+	} while (0)
+
+	/* likely at first */
+	CALL_RXH(lowpan_frag_rx_h_iphc);
+	CALL_RXH(lowpan_rx_h_ipv6);
+
+rxh_next:
+	return lowpan_frag_rx_handlers_result(skb, res);
+#undef CALL_RXH
+}
+
+#define LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK	0x07
+#define LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT	8
+
+static int lowpan_get_cb(struct sk_buff *skb, u8 frag_type,
+			 struct lowpan_802154_cb *cb)
 {
 	bool fail;
-	u8 pattern = 0, low = 0;
+	u8 high = 0, low = 0;
 	__be16 d_tag = 0;
 
-	fail = lowpan_fetch_skb(skb, &pattern, 1);
+	fail = lowpan_fetch_skb(skb, &high, 1);
 	fail |= lowpan_fetch_skb(skb, &low, 1);
-	frag_info->d_size = (pattern & 7) << 8 | low;
+	/* remove the dispatch value and use first three bits as high value
+	 * for the datagram size
+	 */
+	cb->d_size = (high & LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK) <<
+		LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT | low;
 	fail |= lowpan_fetch_skb(skb, &d_tag, 2);
-	frag_info->d_tag = ntohs(d_tag);
+	cb->d_tag = ntohs(d_tag);
 
 	if (frag_type == LOWPAN_DISPATCH_FRAGN) {
-		fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1);
+		fail |= lowpan_fetch_skb(skb, &cb->d_offset, 1);
 	} else {
 		skb_reset_network_header(skb);
-		frag_info->d_offset = 0;
+		cb->d_offset = 0;
+		/* check if datagram_size has ipv6hdr on FRAG1 */
+		fail |= cb->d_size < sizeof(struct ipv6hdr);
+		/* check if we can dereference the dispatch value */
+		fail |= !skb->len;
 	}
 
 	if (unlikely(fail))
@@ -351,27 +403,33 @@ static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
 	return 0;
 }
 
-int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
+int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
 {
 	struct lowpan_frag_queue *fq;
 	struct net *net = dev_net(skb->dev);
-	struct lowpan_frag_info *frag_info = lowpan_cb(skb);
-	struct ieee802154_addr source, dest;
+	struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
+	struct ieee802154_hdr hdr;
 	int err;
 
-	source = mac_cb(skb)->source;
-	dest = mac_cb(skb)->dest;
+	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+		goto err;
 
-	err = lowpan_get_frag_info(skb, frag_type, frag_info);
+	err = lowpan_get_cb(skb, frag_type, cb);
 	if (err < 0)
 		goto err;
 
-	if (frag_info->d_size > IPV6_MIN_MTU) {
+	if (frag_type == LOWPAN_DISPATCH_FRAG1) {
+		err = lowpan_invoke_frag_rx_handlers(skb);
+		if (err == NET_RX_DROP)
+			goto err;
+	}
+
+	if (cb->d_size > IPV6_MIN_MTU) {
 		net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n");
 		goto err;
 	}
 
-	fq = fq_find(net, frag_info, &source, &dest);
+	fq = fq_find(net, cb, &hdr.source, &hdr.dest);
 	if (fq != NULL) {
 		int ret;
 
@@ -387,7 +445,6 @@ err:
 	kfree_skb(skb);
 	return -1;
 }
-EXPORT_SYMBOL(lowpan_frag_rcv);
 
 #ifdef CONFIG_SYSCTL
 static int zero;
@@ -523,14 +580,19 @@ static int __net_init lowpan_frags_init_net(struct net *net)
 {
 	struct netns_ieee802154_lowpan *ieee802154_lowpan =
 		net_ieee802154_lowpan(net);
+	int res;
 
 	ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	inet_frags_init_net(&ieee802154_lowpan->frags);
-
-	return lowpan_frags_ns_sysctl_register(net);
+	res = inet_frags_init_net(&ieee802154_lowpan->frags);
+	if (res)
+		return res;
+	res = lowpan_frags_ns_sysctl_register(net);
+	if (res)
+		inet_frags_uninit_net(&ieee802154_lowpan->frags);
+	return res;
 }
 
 static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index 12e10201d263..ef185dd4110d 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -11,126 +11,307 @@
 #include <linux/if_arp.h>
 
 #include <net/6lowpan.h>
+#include <net/mac802154.h>
 #include <net/ieee802154_netdev.h>
 
 #include "6lowpan_i.h"
 
-static int lowpan_give_skb_to_device(struct sk_buff *skb,
-				     struct net_device *dev)
+#define LOWPAN_DISPATCH_FIRST		0xc0
+#define LOWPAN_DISPATCH_FRAG_MASK	0xf8
+
+#define LOWPAN_DISPATCH_NALP		0x00
+#define LOWPAN_DISPATCH_ESC		0x40
+#define LOWPAN_DISPATCH_HC1		0x42
+#define LOWPAN_DISPATCH_DFF		0x43
+#define LOWPAN_DISPATCH_BC0		0x50
+#define LOWPAN_DISPATCH_MESH		0x80
+
+static int lowpan_give_skb_to_device(struct sk_buff *skb)
 {
-	skb->dev = dev->ieee802154_ptr->lowpan_dev;
 	skb->protocol = htons(ETH_P_IPV6);
-	skb->pkt_type = PACKET_HOST;
+	skb->dev->stats.rx_packets++;
+	skb->dev->stats.rx_bytes += skb->len;
 
 	return netif_rx(skb);
 }
 
-static int
-iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
+static int lowpan_rx_handlers_result(struct sk_buff *skb, lowpan_rx_result res)
 {
-	u8 iphc0, iphc1;
-	struct ieee802154_addr_sa sa, da;
-	void *sap, *dap;
+	switch (res) {
+	case RX_CONTINUE:
+		/* nobody cared about this packet */
+		net_warn_ratelimited("%s: received unknown dispatch\n",
+				     __func__);
 
-	raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
-	/* at least two bytes will be used for the encoding */
-	if (skb->len < 2)
-		return -EINVAL;
+		/* fall-through */
+	case RX_DROP_UNUSABLE:
+		kfree_skb(skb);
 
-	if (lowpan_fetch_skb_u8(skb, &iphc0))
-		return -EINVAL;
+		/* fall-through */
+	case RX_DROP:
+		return NET_RX_DROP;
+	case RX_QUEUED:
+		return lowpan_give_skb_to_device(skb);
+	default:
+		break;
+	}
 
-	if (lowpan_fetch_skb_u8(skb, &iphc1))
-		return -EINVAL;
+	return NET_RX_DROP;
+}
 
-	ieee802154_addr_to_sa(&sa, &hdr->source);
-	ieee802154_addr_to_sa(&da, &hdr->dest);
+static inline bool lowpan_is_frag1(u8 dispatch)
+{
+	return (dispatch & LOWPAN_DISPATCH_FRAG_MASK) == LOWPAN_DISPATCH_FRAG1;
+}
 
-	if (sa.addr_type == IEEE802154_ADDR_SHORT)
-		sap = &sa.short_addr;
-	else
-		sap = &sa.hwaddr;
+static inline bool lowpan_is_fragn(u8 dispatch)
+{
+	return (dispatch & LOWPAN_DISPATCH_FRAG_MASK) == LOWPAN_DISPATCH_FRAGN;
+}
 
-	if (da.addr_type == IEEE802154_ADDR_SHORT)
-		dap = &da.short_addr;
-	else
-		dap = &da.hwaddr;
+static lowpan_rx_result lowpan_rx_h_frag(struct sk_buff *skb)
+{
+	int ret;
 
-	return lowpan_header_decompress(skb, skb->dev, sap, sa.addr_type,
-					IEEE802154_ADDR_LEN, dap, da.addr_type,
-					IEEE802154_ADDR_LEN, iphc0, iphc1);
+	if (!(lowpan_is_frag1(*skb_network_header(skb)) ||
+	      lowpan_is_fragn(*skb_network_header(skb))))
+		return RX_CONTINUE;
+
+	ret = lowpan_frag_rcv(skb, *skb_network_header(skb) &
+			      LOWPAN_DISPATCH_FRAG_MASK);
+	if (ret == 1)
+		return RX_QUEUED;
+
+	/* Packet is freed by lowpan_frag_rcv on error or put into the frag
+	 * bucket.
+	 */
+	return RX_DROP;
 }
 
-static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
-		      struct packet_type *pt, struct net_device *orig_dev)
+int lowpan_iphc_decompress(struct sk_buff *skb)
 {
 	struct ieee802154_hdr hdr;
+
+	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+		return -EINVAL;
+
+	return lowpan_header_decompress(skb, skb->dev, &hdr.dest, &hdr.source);
+}
+
+static lowpan_rx_result lowpan_rx_h_iphc(struct sk_buff *skb)
+{
 	int ret;
 
-	if (dev->type != ARPHRD_IEEE802154 ||
-	    !dev->ieee802154_ptr->lowpan_dev)
-		goto drop;
+	if (!lowpan_is_iphc(*skb_network_header(skb)))
+		return RX_CONTINUE;
 
-	skb = skb_share_check(skb, GFP_ATOMIC);
-	if (!skb)
+	/* Setting datagram_offset to zero indicates non frag handling
+	 * while doing lowpan_header_decompress.
+	 */
+	lowpan_802154_cb(skb)->d_size = 0;
+
+	ret = lowpan_iphc_decompress(skb);
+	if (ret < 0)
+		return RX_DROP_UNUSABLE;
+
+	return RX_QUEUED;
+}
+
+lowpan_rx_result lowpan_rx_h_ipv6(struct sk_buff *skb)
+{
+	if (!lowpan_is_ipv6(*skb_network_header(skb)))
+		return RX_CONTINUE;
+
+	/* Pull off the 1-byte of 6lowpan header. */
+	skb_pull(skb, 1);
+	return RX_QUEUED;
+}
+
+static inline bool lowpan_is_esc(u8 dispatch)
+{
+	return dispatch == LOWPAN_DISPATCH_ESC;
+}
+
+static lowpan_rx_result lowpan_rx_h_esc(struct sk_buff *skb)
+{
+	if (!lowpan_is_esc(*skb_network_header(skb)))
+		return RX_CONTINUE;
+
+	net_warn_ratelimited("%s: %s\n", skb->dev->name,
+			     "6LoWPAN ESC not supported\n");
+
+	return RX_DROP_UNUSABLE;
+}
+
+static inline bool lowpan_is_hc1(u8 dispatch)
+{
+	return dispatch == LOWPAN_DISPATCH_HC1;
+}
+
+static lowpan_rx_result lowpan_rx_h_hc1(struct sk_buff *skb)
+{
+	if (!lowpan_is_hc1(*skb_network_header(skb)))
+		return RX_CONTINUE;
+
+	net_warn_ratelimited("%s: %s\n", skb->dev->name,
+			     "6LoWPAN HC1 not supported\n");
+
+	return RX_DROP_UNUSABLE;
+}
+
+static inline bool lowpan_is_dff(u8 dispatch)
+{
+	return dispatch == LOWPAN_DISPATCH_DFF;
+}
+
+static lowpan_rx_result lowpan_rx_h_dff(struct sk_buff *skb)
+{
+	if (!lowpan_is_dff(*skb_network_header(skb)))
+		return RX_CONTINUE;
+
+	net_warn_ratelimited("%s: %s\n", skb->dev->name,
+			     "6LoWPAN DFF not supported\n");
+
+	return RX_DROP_UNUSABLE;
+}
+
+static inline bool lowpan_is_bc0(u8 dispatch)
+{
+	return dispatch == LOWPAN_DISPATCH_BC0;
+}
+
+static lowpan_rx_result lowpan_rx_h_bc0(struct sk_buff *skb)
+{
+	if (!lowpan_is_bc0(*skb_network_header(skb)))
+		return RX_CONTINUE;
+
+	net_warn_ratelimited("%s: %s\n", skb->dev->name,
+			     "6LoWPAN BC0 not supported\n");
+
+	return RX_DROP_UNUSABLE;
+}
+
+static inline bool lowpan_is_mesh(u8 dispatch)
+{
+	return (dispatch & LOWPAN_DISPATCH_FIRST) == LOWPAN_DISPATCH_MESH;
+}
+
+static lowpan_rx_result lowpan_rx_h_mesh(struct sk_buff *skb)
+{
+	if (!lowpan_is_mesh(*skb_network_header(skb)))
+		return RX_CONTINUE;
+
+	net_warn_ratelimited("%s: %s\n", skb->dev->name,
+			     "6LoWPAN MESH not supported\n");
+
+	return RX_DROP_UNUSABLE;
+}
+
+static int lowpan_invoke_rx_handlers(struct sk_buff *skb)
+{
+	lowpan_rx_result res;
+
+#define CALL_RXH(rxh)			\
+	do {				\
+		res = rxh(skb);	\
+		if (res != RX_CONTINUE)	\
+			goto rxh_next;	\
+	} while (0)
+
+	/* likely at first */
+	CALL_RXH(lowpan_rx_h_iphc);
+	CALL_RXH(lowpan_rx_h_frag);
+	CALL_RXH(lowpan_rx_h_ipv6);
+	CALL_RXH(lowpan_rx_h_esc);
+	CALL_RXH(lowpan_rx_h_hc1);
+	CALL_RXH(lowpan_rx_h_dff);
+	CALL_RXH(lowpan_rx_h_bc0);
+	CALL_RXH(lowpan_rx_h_mesh);
+
+rxh_next:
+	return lowpan_rx_handlers_result(skb, res);
+#undef CALL_RXH
+}
+
+static inline bool lowpan_is_nalp(u8 dispatch)
+{
+	return (dispatch & LOWPAN_DISPATCH_FIRST) == LOWPAN_DISPATCH_NALP;
+}
+
+/* Lookup for reserved dispatch values at:
+ * https://www.iana.org/assignments/_6lowpan-parameters/_6lowpan-parameters.xhtml#_6lowpan-parameters-1
+ *
+ * Last Updated: 2015-01-22
+ */
+static inline bool lowpan_is_reserved(u8 dispatch)
+{
+	return ((dispatch >= 0x44 && dispatch <= 0x4F) ||
+		(dispatch >= 0x51 && dispatch <= 0x5F) ||
+		(dispatch >= 0xc8 && dispatch <= 0xdf) ||
+		(dispatch >= 0xe8 && dispatch <= 0xff));
+}
+
+/* lowpan_rx_h_check checks on generic 6LoWPAN requirements
+ * in MAC and 6LoWPAN header.
+ *
+ * Don't manipulate the skb here, it could be shared buffer.
+ */
+static inline bool lowpan_rx_h_check(struct sk_buff *skb)
+{
+	__le16 fc = ieee802154_get_fc_from_skb(skb);
+
+	/* check on ieee802154 conform 6LoWPAN header */
+	if (!ieee802154_is_data(fc) ||
+	    !ieee802154_is_intra_pan(fc))
+		return false;
+
+	/* check if we can dereference the dispatch */
+	if (unlikely(!skb->len))
+		return false;
+
+	if (lowpan_is_nalp(*skb_network_header(skb)) ||
+	    lowpan_is_reserved(*skb_network_header(skb)))
+		return false;
+
+	return true;
+}
+
+static int lowpan_rcv(struct sk_buff *skb, struct net_device *wdev,
+		      struct packet_type *pt, struct net_device *orig_wdev)
+{
+	struct net_device *ldev;
+
+	if (wdev->type != ARPHRD_IEEE802154 ||
+	    skb->pkt_type == PACKET_OTHERHOST ||
+	    !lowpan_rx_h_check(skb))
 		goto drop;
 
-	if (!netif_running(dev))
-		goto drop_skb;
+	ldev = wdev->ieee802154_ptr->lowpan_dev;
+	if (!ldev || !netif_running(ldev))
+		goto drop;
 
-	if (skb->pkt_type == PACKET_OTHERHOST)
-		goto drop_skb;
+	/* Replacing skb->dev and followed rx handlers will manipulate skb. */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+	skb->dev = ldev;
 
-	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
-		goto drop_skb;
-
-	/* check that it's our buffer */
-	if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
-		/* Pull off the 1-byte of 6lowpan header. */
-		skb_pull(skb, 1);
-		return lowpan_give_skb_to_device(skb, dev);
-	} else {
-		switch (skb->data[0] & 0xe0) {
-		case LOWPAN_DISPATCH_IPHC:	/* ipv6 datagram */
-			ret = iphc_decompress(skb, &hdr);
-			if (ret < 0)
-				goto drop_skb;
-
-			return lowpan_give_skb_to_device(skb, dev);
-		case LOWPAN_DISPATCH_FRAG1:	/* first fragment header */
-			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
-			if (ret == 1) {
-				ret = iphc_decompress(skb, &hdr);
-				if (ret < 0)
-					goto drop_skb;
-
-				return lowpan_give_skb_to_device(skb, dev);
-			} else if (ret == -1) {
-				return NET_RX_DROP;
-			} else {
-				return NET_RX_SUCCESS;
-			}
-		case LOWPAN_DISPATCH_FRAGN:	/* next fragments headers */
-			ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
-			if (ret == 1) {
-				ret = iphc_decompress(skb, &hdr);
-				if (ret < 0)
-					goto drop_skb;
-
-				return lowpan_give_skb_to_device(skb, dev);
-			} else if (ret == -1) {
-				return NET_RX_DROP;
-			} else {
-				return NET_RX_SUCCESS;
-			}
-		default:
-			break;
-		}
+	/* When receive frag1 it's likely that we manipulate the buffer.
+	 * When recevie iphc we manipulate the data buffer. So we need
+	 * to unshare the buffer.
+	 */
+	if (lowpan_is_frag1(*skb_network_header(skb)) ||
+	    lowpan_is_iphc(*skb_network_header(skb))) {
+		skb = skb_unshare(skb, GFP_ATOMIC);
+		if (!skb)
+			goto out;
 	}
 
-drop_skb:
-	kfree_skb(skb);
+	return lowpan_invoke_rx_handlers(skb);
+
 drop:
+	kfree_skb(skb);
+out:
 	return NET_RX_DROP;
 }
 
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index f6263fc12340..d4353faced35 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -10,9 +10,13 @@
 
 #include <net/6lowpan.h>
 #include <net/ieee802154_netdev.h>
+#include <net/mac802154.h>
 
 #include "6lowpan_i.h"
 
+#define LOWPAN_FRAG1_HEAD_SIZE	0x4
+#define LOWPAN_FRAGN_HEAD_SIZE	0x5
+
 /* don't save pan id, it's intra pan */
 struct lowpan_addr {
 	u8 mode;
@@ -36,7 +40,14 @@ lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
 			sizeof(struct lowpan_addr_info));
 }
 
-int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
+/* This callback will be called from AF_PACKET and IPv6 stack, the AF_PACKET
+ * sockets gives an 8 byte array for addresses only!
+ *
+ * TODO I think AF_PACKET DGRAM (sending/receiving) RAW (sending) makes no
+ * sense here. We should disable it, the right use-case would be AF_INET6
+ * RAW/DGRAM sockets.
+ */
+int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
 			 unsigned short type, const void *_daddr,
 			 const void *_saddr, unsigned int len)
 {
@@ -51,7 +62,7 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
 		return 0;
 
 	if (!saddr)
-		saddr = dev->dev_addr;
+		saddr = ldev->dev_addr;
 
 	raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
 	raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
@@ -71,28 +82,33 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
 
 static struct sk_buff*
 lowpan_alloc_frag(struct sk_buff *skb, int size,
-		  const struct ieee802154_hdr *master_hdr)
+		  const struct ieee802154_hdr *master_hdr, bool frag1)
 {
-	struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev;
+	struct net_device *wdev = lowpan_dev_info(skb->dev)->wdev;
 	struct sk_buff *frag;
 	int rc;
 
-	frag = alloc_skb(real_dev->hard_header_len +
-			 real_dev->needed_tailroom + size,
+	frag = alloc_skb(wdev->needed_headroom + wdev->needed_tailroom + size,
 			 GFP_ATOMIC);
 
 	if (likely(frag)) {
-		frag->dev = real_dev;
+		frag->dev = wdev;
 		frag->priority = skb->priority;
-		skb_reserve(frag, real_dev->hard_header_len);
+		skb_reserve(frag, wdev->needed_headroom);
 		skb_reset_network_header(frag);
 		*mac_cb(frag) = *mac_cb(skb);
 
-		rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest,
-				     &master_hdr->source, size);
-		if (rc < 0) {
-			kfree_skb(frag);
-			return ERR_PTR(rc);
+		if (frag1) {
+			memcpy(skb_put(frag, skb->mac_len),
+			       skb_mac_header(skb), skb->mac_len);
+		} else {
+			rc = wpan_dev_hard_header(frag, wdev,
+						  &master_hdr->dest,
+						  &master_hdr->source, size);
+			if (rc < 0) {
+				kfree_skb(frag);
+				return ERR_PTR(rc);
+			}
 		}
 	} else {
 		frag = ERR_PTR(-ENOMEM);
@@ -104,13 +120,13 @@ lowpan_alloc_frag(struct sk_buff *skb, int size,
 static int
 lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
 		     u8 *frag_hdr, int frag_hdrlen,
-		     int offset, int len)
+		     int offset, int len, bool frag1)
 {
 	struct sk_buff *frag;
 
 	raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
 
-	frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
+	frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr, frag1);
 	if (IS_ERR(frag))
 		return PTR_ERR(frag);
 
@@ -123,19 +139,17 @@ lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
 }
 
 static int
-lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
-		       const struct ieee802154_hdr *wpan_hdr)
+lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *ldev,
+		       const struct ieee802154_hdr *wpan_hdr, u16 dgram_size,
+		       u16 dgram_offset)
 {
-	u16 dgram_size, dgram_offset;
 	__be16 frag_tag;
 	u8 frag_hdr[5];
 	int frag_cap, frag_len, payload_cap, rc;
 	int skb_unprocessed, skb_offset;
 
-	dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
-		     skb->mac_len;
-	frag_tag = htons(lowpan_dev_info(dev)->fragment_tag);
-	lowpan_dev_info(dev)->fragment_tag++;
+	frag_tag = htons(lowpan_dev_info(ldev)->fragment_tag);
+	lowpan_dev_info(ldev)->fragment_tag++;
 
 	frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
 	frag_hdr[1] = dgram_size & 0xff;
@@ -151,7 +165,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
 
 	rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
 				  LOWPAN_FRAG1_HEAD_SIZE, 0,
-				  frag_len + skb_network_header_len(skb));
+				  frag_len + skb_network_header_len(skb),
+				  true);
 	if (rc) {
 		pr_debug("%s unable to send FRAG1 packet (tag: %d)",
 			 __func__, ntohs(frag_tag));
@@ -172,7 +187,7 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
 
 		rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
 					  LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
-					  frag_len);
+					  frag_len, false);
 		if (rc) {
 			pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
 				 __func__, ntohs(frag_tag), skb_offset);
@@ -180,6 +195,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
 		}
 	} while (skb_unprocessed > frag_cap);
 
+	ldev->stats.tx_packets++;
+	ldev->stats.tx_bytes += dgram_size;
 	consume_skb(skb);
 	return NET_XMIT_SUCCESS;
 
@@ -188,9 +205,10 @@ err:
 	return rc;
 }
 
-static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
+static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
+			 u16 *dgram_size, u16 *dgram_offset)
 {
-	struct wpan_dev *wpan_dev = lowpan_dev_info(dev)->real_dev->ieee802154_ptr;
+	struct wpan_dev *wpan_dev = lowpan_dev_info(ldev)->wdev->ieee802154_ptr;
 	struct ieee802154_addr sa, da;
 	struct ieee802154_mac_cb *cb = mac_cb_init(skb);
 	struct lowpan_addr_info info;
@@ -202,7 +220,10 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
 	daddr = &info.daddr.u.extended_addr;
 	saddr = &info.saddr.u.extended_addr;
 
-	lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len);
+	*dgram_size = skb->len;
+	lowpan_header_compress(skb, ldev, daddr, saddr);
+	/* dgram_offset = (saved bytes after compression) + lowpan header len */
+	*dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb);
 
 	cb->type = IEEE802154_FC_TYPE_DATA;
 
@@ -217,7 +238,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
 	/* if the destination address is the broadcast address, use the
 	 * corresponding short address
 	 */
-	if (lowpan_is_addr_broadcast((const u8 *)daddr)) {
+	if (!memcmp(daddr, ldev->broadcast, EUI64_ADDR_LEN)) {
 		da.mode = IEEE802154_ADDR_SHORT;
 		da.short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
 		cb->ackreq = false;
@@ -227,17 +248,20 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
 		cb->ackreq = wpan_dev->ackreq;
 	}
 
-	return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
-			ETH_P_IPV6, (void *)&da, (void *)&sa, 0);
+	return wpan_dev_hard_header(skb, lowpan_dev_info(ldev)->wdev, &da, &sa,
+				    0);
 }
 
-netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
 {
 	struct ieee802154_hdr wpan_hdr;
 	int max_single, ret;
+	u16 dgram_size, dgram_offset;
 
 	pr_debug("package xmit\n");
 
+	WARN_ON_ONCE(skb->len > IPV6_MIN_MTU);
+
 	/* We must take a copy of the skb before we modify/replace the ipv6
 	 * header as the header could be used elsewhere
 	 */
@@ -245,7 +269,7 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!skb)
 		return NET_XMIT_DROP;
 
-	ret = lowpan_header(skb, dev);
+	ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset);
 	if (ret < 0) {
 		kfree_skb(skb);
 		return NET_XMIT_DROP;
@@ -259,13 +283,16 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
 	max_single = ieee802154_max_payload(&wpan_hdr);
 
 	if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
-		skb->dev = lowpan_dev_info(dev)->real_dev;
+		skb->dev = lowpan_dev_info(ldev)->wdev;
+		ldev->stats.tx_packets++;
+		ldev->stats.tx_bytes += dgram_size;
 		return dev_queue_xmit(skb);
 	} else {
 		netdev_tx_t rc;
 
 		pr_debug("frame is too big, fragmentation is needed\n");
-		rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr);
+		rc = lowpan_xmit_fragmented(skb, ldev, &wpan_hdr, dgram_size,
+					    dgram_offset);
 
 		return rc < 0 ? NET_XMIT_DROP : rc;
 	}
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 1370d5b0041b..188135bcb803 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -12,6 +12,11 @@ menuconfig IEEE802154
 
 if IEEE802154
 
+config IEEE802154_NL802154_EXPERIMENTAL
+	bool "IEEE 802.15.4 experimental netlink support"
+	---help---
+	  Adds experimental netlink support for nl802154.
+
 config IEEE802154_SOCKET
 	tristate "IEEE 802.15.4 socket interface"
 	default y
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index b0248e934230..c35fdfa6d04e 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -95,6 +95,18 @@ cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx)
 	return result;
 }
 
+struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx)
+{
+	struct cfg802154_registered_device *rdev;
+
+	ASSERT_RTNL();
+
+	rdev = cfg802154_rdev_by_wpan_phy_idx(wpan_phy_idx);
+	if (!rdev)
+		return NULL;
+	return &rdev->wpan_phy;
+}
+
 struct wpan_phy *
 wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
 {
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index f3e95580caee..231fade959f3 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -42,5 +42,6 @@ extern int cfg802154_rdev_list_generation;
 void cfg802154_dev_free(struct cfg802154_registered_device *rdev);
 struct cfg802154_registered_device *
 cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx);
+struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx);
 
 #endif /* __IEEE802154_CORE_H */
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index a051b6993177..c7439f0fbbdf 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c
@@ -83,35 +83,35 @@ ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr)
 }
 
 int
-ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
+ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr)
 {
-	u8 buf[MAC802154_FRAME_HARD_HEADER_LEN];
+	u8 buf[IEEE802154_MAX_HEADER_LEN];
 	int pos = 2;
 	int rc;
-	struct ieee802154_hdr_fc fc = hdr->fc;
+	struct ieee802154_hdr_fc *fc = &hdr->fc;
 
 	buf[pos++] = hdr->seq;
 
-	fc.dest_addr_mode = hdr->dest.mode;
+	fc->dest_addr_mode = hdr->dest.mode;
 
 	rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false);
 	if (rc < 0)
 		return -EINVAL;
 	pos += rc;
 
-	fc.source_addr_mode = hdr->source.mode;
+	fc->source_addr_mode = hdr->source.mode;
 
 	if (hdr->source.pan_id == hdr->dest.pan_id &&
 	    hdr->dest.mode != IEEE802154_ADDR_NONE)
-		fc.intra_pan = true;
+		fc->intra_pan = true;
 
-	rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc.intra_pan);
+	rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc->intra_pan);
 	if (rc < 0)
 		return -EINVAL;
 	pos += rc;
 
-	if (fc.security_enabled) {
-		fc.version = 1;
+	if (fc->security_enabled) {
+		fc->version = 1;
 
 		rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec);
 		if (rc < 0)
@@ -120,7 +120,7 @@ ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
 		pos += rc;
 	}
 
-	memcpy(buf, &fc, 2);
+	memcpy(buf, fc, 2);
 
 	memcpy(skb_push(skb, pos), buf, pos);
 
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 3f89c0abdab1..16ef0d9f566e 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -232,8 +232,86 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
 	[NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED },
 
 	[NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 },
+
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+	[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
+	[NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
+	[NL802154_ATTR_SEC_OUT_KEY_ID] = { .type = NLA_NESTED, },
+	[NL802154_ATTR_SEC_FRAME_COUNTER] = { .type = NLA_U32 },
+
+	[NL802154_ATTR_SEC_LEVEL] = { .type = NLA_NESTED },
+	[NL802154_ATTR_SEC_DEVICE] = { .type = NLA_NESTED },
+	[NL802154_ATTR_SEC_DEVKEY] = { .type = NLA_NESTED },
+	[NL802154_ATTR_SEC_KEY] = { .type = NLA_NESTED },
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 };
 
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+static int
+nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
+			       struct netlink_callback *cb,
+			       struct cfg802154_registered_device **rdev,
+			       struct wpan_dev **wpan_dev)
+{
+	int err;
+
+	rtnl_lock();
+
+	if (!cb->args[0]) {
+		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
+				  nl802154_fam.attrbuf, nl802154_fam.maxattr,
+				  nl802154_policy);
+		if (err)
+			goto out_unlock;
+
+		*wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
+							    nl802154_fam.attrbuf);
+		if (IS_ERR(*wpan_dev)) {
+			err = PTR_ERR(*wpan_dev);
+			goto out_unlock;
+		}
+		*rdev = wpan_phy_to_rdev((*wpan_dev)->wpan_phy);
+		/* 0 is the first index - add 1 to parse only once */
+		cb->args[0] = (*rdev)->wpan_phy_idx + 1;
+		cb->args[1] = (*wpan_dev)->identifier;
+	} else {
+		/* subtract the 1 again here */
+		struct wpan_phy *wpan_phy = wpan_phy_idx_to_wpan_phy(cb->args[0] - 1);
+		struct wpan_dev *tmp;
+
+		if (!wpan_phy) {
+			err = -ENODEV;
+			goto out_unlock;
+		}
+		*rdev = wpan_phy_to_rdev(wpan_phy);
+		*wpan_dev = NULL;
+
+		list_for_each_entry(tmp, &(*rdev)->wpan_dev_list, list) {
+			if (tmp->identifier == cb->args[1]) {
+				*wpan_dev = tmp;
+				break;
+			}
+		}
+
+		if (!*wpan_dev) {
+			err = -ENODEV;
+			goto out_unlock;
+		}
+	}
+
+	return 0;
+ out_unlock:
+	rtnl_unlock();
+	return err;
+}
+
+static void
+nl802154_finish_wpan_dev_dump(struct cfg802154_registered_device *rdev)
+{
+	rtnl_unlock();
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
 /* message building helper */
 static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
 				    int flags, u8 cmd)
@@ -612,6 +690,107 @@ static inline u64 wpan_dev_id(struct wpan_dev *wpan_dev)
 	       ((u64)wpan_phy_to_rdev(wpan_dev->wpan_phy)->wpan_phy_idx << 32);
 }
 
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+#include <net/ieee802154_netdev.h>
+
+static int
+ieee802154_llsec_send_key_id(struct sk_buff *msg,
+			     const struct ieee802154_llsec_key_id *desc)
+{
+	struct nlattr *nl_dev_addr;
+
+	if (nla_put_u32(msg, NL802154_KEY_ID_ATTR_MODE, desc->mode))
+		return -ENOBUFS;
+
+	switch (desc->mode) {
+	case NL802154_KEY_ID_MODE_IMPLICIT:
+		nl_dev_addr = nla_nest_start(msg, NL802154_KEY_ID_ATTR_IMPLICIT);
+		if (!nl_dev_addr)
+			return -ENOBUFS;
+
+		if (nla_put_le16(msg, NL802154_DEV_ADDR_ATTR_PAN_ID,
+				 desc->device_addr.pan_id) ||
+		    nla_put_u32(msg,  NL802154_DEV_ADDR_ATTR_MODE,
+				desc->device_addr.mode))
+			return -ENOBUFS;
+
+		switch (desc->device_addr.mode) {
+		case NL802154_DEV_ADDR_SHORT:
+			if (nla_put_le16(msg, NL802154_DEV_ADDR_ATTR_SHORT,
+					 desc->device_addr.short_addr))
+				return -ENOBUFS;
+			break;
+		case NL802154_DEV_ADDR_EXTENDED:
+			if (nla_put_le64(msg, NL802154_DEV_ADDR_ATTR_EXTENDED,
+					 desc->device_addr.extended_addr))
+				return -ENOBUFS;
+			break;
+		default:
+			/* userspace should handle unknown */
+			break;
+		}
+
+		nla_nest_end(msg, nl_dev_addr);
+		break;
+	case NL802154_KEY_ID_MODE_INDEX:
+		break;
+	case NL802154_KEY_ID_MODE_INDEX_SHORT:
+		/* TODO renmae short_source? */
+		if (nla_put_le32(msg, NL802154_KEY_ID_ATTR_SOURCE_SHORT,
+				 desc->short_source))
+			return -ENOBUFS;
+		break;
+	case NL802154_KEY_ID_MODE_INDEX_EXTENDED:
+		if (nla_put_le64(msg, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED,
+				 desc->extended_source))
+			return -ENOBUFS;
+		break;
+	default:
+		/* userspace should handle unknown */
+		break;
+	}
+
+	/* TODO key_id to key_idx ? Check naming */
+	if (desc->mode != NL802154_KEY_ID_MODE_IMPLICIT) {
+		if (nla_put_u8(msg, NL802154_KEY_ID_ATTR_INDEX, desc->id))
+			return -ENOBUFS;
+	}
+
+	return 0;
+}
+
+static int nl802154_get_llsec_params(struct sk_buff *msg,
+				     struct cfg802154_registered_device *rdev,
+				     struct wpan_dev *wpan_dev)
+{
+	struct nlattr *nl_key_id;
+	struct ieee802154_llsec_params params;
+	int ret;
+
+	ret = rdev_get_llsec_params(rdev, wpan_dev, &params);
+	if (ret < 0)
+		return ret;
+
+	if (nla_put_u8(msg, NL802154_ATTR_SEC_ENABLED, params.enabled) ||
+	    nla_put_u32(msg, NL802154_ATTR_SEC_OUT_LEVEL, params.out_level) ||
+	    nla_put_be32(msg, NL802154_ATTR_SEC_FRAME_COUNTER,
+			 params.frame_counter))
+		return -ENOBUFS;
+
+	nl_key_id = nla_nest_start(msg, NL802154_ATTR_SEC_OUT_KEY_ID);
+	if (!nl_key_id)
+		return -ENOBUFS;
+
+	ret = ieee802154_llsec_send_key_id(msg, &params.out_key);
+	if (ret < 0)
+		return ret;
+
+	nla_nest_end(msg, nl_key_id);
+
+	return 0;
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
 static int
 nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
 		    struct cfg802154_registered_device *rdev,
@@ -663,6 +842,11 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
 	if (nla_put_u8(msg, NL802154_ATTR_ACKREQ_DEFAULT, wpan_dev->ackreq))
 		goto nla_put_failure;
 
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+	if (nl802154_get_llsec_params(msg, rdev, wpan_dev) < 0)
+		goto nla_put_failure;
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -753,10 +937,8 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 	}
 
-	/* TODO add nla_get_le64 to netlink */
 	if (info->attrs[NL802154_ATTR_EXTENDED_ADDR])
-		extended_addr = (__force __le64)nla_get_u64(
-				info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
+		extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
 
 	if (!rdev->ops->add_virtual_intf)
 		return -EOPNOTSUPP;
@@ -1075,6 +1257,838 @@ nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_ackreq_default(rdev, wpan_dev, ackreq);
 }
 
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
+	[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
+	[NL802154_DEV_ADDR_ATTR_MODE] = { .type = NLA_U32 },
+	[NL802154_DEV_ADDR_ATTR_SHORT] = { .type = NLA_U16 },
+	[NL802154_DEV_ADDR_ATTR_EXTENDED] = { .type = NLA_U64 },
+};
+
+static int
+ieee802154_llsec_parse_dev_addr(struct nlattr *nla,
+				struct ieee802154_addr *addr)
+{
+	struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1];
+
+	if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla,
+				     nl802154_dev_addr_policy))
+		return -EINVAL;
+
+	if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] &&
+	    !attrs[NL802154_DEV_ADDR_ATTR_MODE] &&
+	    !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] ||
+	      attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]))
+		return -EINVAL;
+
+	addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]);
+	addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]);
+	switch (addr->mode) {
+	case NL802154_DEV_ADDR_SHORT:
+		addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]);
+		break;
+	case NL802154_DEV_ADDR_EXTENDED:
+		addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct nla_policy nl802154_key_id_policy[NL802154_KEY_ID_ATTR_MAX + 1] = {
+	[NL802154_KEY_ID_ATTR_MODE] = { .type = NLA_U32 },
+	[NL802154_KEY_ID_ATTR_INDEX] = { .type = NLA_U8 },
+	[NL802154_KEY_ID_ATTR_IMPLICIT] = { .type = NLA_NESTED },
+	[NL802154_KEY_ID_ATTR_SOURCE_SHORT] = { .type = NLA_U32 },
+	[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED] = { .type = NLA_U64 },
+};
+
+static int
+ieee802154_llsec_parse_key_id(struct nlattr *nla,
+			      struct ieee802154_llsec_key_id *desc)
+{
+	struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1];
+
+	if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla,
+				     nl802154_key_id_policy))
+		return -EINVAL;
+
+	if (!attrs[NL802154_KEY_ID_ATTR_MODE])
+		return -EINVAL;
+
+	desc->mode = nla_get_u32(attrs[NL802154_KEY_ID_ATTR_MODE]);
+	switch (desc->mode) {
+	case NL802154_KEY_ID_MODE_IMPLICIT:
+		if (!attrs[NL802154_KEY_ID_ATTR_IMPLICIT])
+			return -EINVAL;
+
+		if (ieee802154_llsec_parse_dev_addr(attrs[NL802154_KEY_ID_ATTR_IMPLICIT],
+						    &desc->device_addr) < 0)
+			return -EINVAL;
+		break;
+	case NL802154_KEY_ID_MODE_INDEX:
+		break;
+	case NL802154_KEY_ID_MODE_INDEX_SHORT:
+		if (!attrs[NL802154_KEY_ID_ATTR_SOURCE_SHORT])
+			return -EINVAL;
+
+		desc->short_source = nla_get_le32(attrs[NL802154_KEY_ID_ATTR_SOURCE_SHORT]);
+		break;
+	case NL802154_KEY_ID_MODE_INDEX_EXTENDED:
+		if (!attrs[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED])
+			return -EINVAL;
+
+		desc->extended_source = nla_get_le64(attrs[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED]);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (desc->mode != NL802154_KEY_ID_MODE_IMPLICIT) {
+		if (!attrs[NL802154_KEY_ID_ATTR_INDEX])
+			return -EINVAL;
+
+		/* TODO change id to idx */
+		desc->id = nla_get_u8(attrs[NL802154_KEY_ID_ATTR_INDEX]);
+	}
+
+	return 0;
+}
+
+static int nl802154_set_llsec_params(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct ieee802154_llsec_params params;
+	u32 changed = 0;
+	int ret;
+
+	if (info->attrs[NL802154_ATTR_SEC_ENABLED]) {
+		u8 enabled;
+
+		enabled = nla_get_u8(info->attrs[NL802154_ATTR_SEC_ENABLED]);
+		if (enabled != 0 && enabled != 1)
+			return -EINVAL;
+
+		params.enabled = nla_get_u8(info->attrs[NL802154_ATTR_SEC_ENABLED]);
+		changed |= IEEE802154_LLSEC_PARAM_ENABLED;
+	}
+
+	if (info->attrs[NL802154_ATTR_SEC_OUT_KEY_ID]) {
+		ret = ieee802154_llsec_parse_key_id(info->attrs[NL802154_ATTR_SEC_OUT_KEY_ID],
+						    &params.out_key);
+		if (ret < 0)
+			return ret;
+
+		changed |= IEEE802154_LLSEC_PARAM_OUT_KEY;
+	}
+
+	if (info->attrs[NL802154_ATTR_SEC_OUT_LEVEL]) {
+		params.out_level = nla_get_u32(info->attrs[NL802154_ATTR_SEC_OUT_LEVEL]);
+		if (params.out_level > NL802154_SECLEVEL_MAX)
+			return -EINVAL;
+
+		changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL;
+	}
+
+	if (info->attrs[NL802154_ATTR_SEC_FRAME_COUNTER]) {
+		params.frame_counter = nla_get_be32(info->attrs[NL802154_ATTR_SEC_FRAME_COUNTER]);
+		changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER;
+	}
+
+	return rdev_set_llsec_params(rdev, wpan_dev, &params, changed);
+}
+
+static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid,
+			     u32 seq, int flags,
+			     struct cfg802154_registered_device *rdev,
+			     struct net_device *dev,
+			     const struct ieee802154_llsec_key_entry *key)
+{
+	void *hdr;
+	u32 commands[NL802154_CMD_FRAME_NR_IDS / 32];
+	struct nlattr *nl_key, *nl_key_id;
+
+	hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+	if (!hdr)
+		return -1;
+
+	if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+		goto nla_put_failure;
+
+	nl_key = nla_nest_start(msg, NL802154_ATTR_SEC_KEY);
+	if (!nl_key)
+		goto nla_put_failure;
+
+	nl_key_id = nla_nest_start(msg, NL802154_KEY_ATTR_ID);
+	if (!nl_key_id)
+		goto nla_put_failure;
+
+	if (ieee802154_llsec_send_key_id(msg, &key->id) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, nl_key_id);
+
+	if (nla_put_u8(msg, NL802154_KEY_ATTR_USAGE_FRAMES,
+		       key->key->frame_types))
+		goto nla_put_failure;
+
+	if (key->key->frame_types & BIT(NL802154_FRAME_CMD)) {
+		/* TODO for each nested */
+		memset(commands, 0, sizeof(commands));
+		commands[7] = key->key->cmd_frame_ids;
+		if (nla_put(msg, NL802154_KEY_ATTR_USAGE_CMDS,
+			    sizeof(commands), commands))
+			goto nla_put_failure;
+	}
+
+	if (nla_put(msg, NL802154_KEY_ATTR_BYTES, NL802154_KEY_SIZE,
+		    key->key->key))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, nl_key);
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int
+nl802154_dump_llsec_key(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct cfg802154_registered_device *rdev = NULL;
+	struct ieee802154_llsec_key_entry *key;
+	struct ieee802154_llsec_table *table;
+	struct wpan_dev *wpan_dev;
+	int err;
+
+	err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+	if (err)
+		return err;
+
+	if (!wpan_dev->netdev) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	rdev_lock_llsec_table(rdev, wpan_dev);
+	rdev_get_llsec_table(rdev, wpan_dev, &table);
+
+	/* TODO make it like station dump */
+	if (cb->args[2])
+		goto out;
+
+	list_for_each_entry(key, &table->keys, list) {
+		if (nl802154_send_key(skb, NL802154_CMD_NEW_SEC_KEY,
+				      NETLINK_CB(cb->skb).portid,
+				      cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				      rdev, wpan_dev->netdev, key) < 0) {
+			/* TODO */
+			err = -EIO;
+			rdev_unlock_llsec_table(rdev, wpan_dev);
+			goto out_err;
+		}
+	}
+
+	cb->args[2] = 1;
+
+out:
+	rdev_unlock_llsec_table(rdev, wpan_dev);
+	err = skb->len;
+out_err:
+	nl802154_finish_wpan_dev_dump(rdev);
+
+	return err;
+}
+
+static const struct nla_policy nl802154_key_policy[NL802154_KEY_ATTR_MAX + 1] = {
+	[NL802154_KEY_ATTR_ID] = { NLA_NESTED },
+	/* TODO handle it as for_each_nested and NLA_FLAG? */
+	[NL802154_KEY_ATTR_USAGE_FRAMES] = { NLA_U8 },
+	/* TODO handle it as for_each_nested, not static array? */
+	[NL802154_KEY_ATTR_USAGE_CMDS] = { .len = NL802154_CMD_FRAME_NR_IDS / 8 },
+	[NL802154_KEY_ATTR_BYTES] = { .len = NL802154_KEY_SIZE },
+};
+
+static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
+	struct ieee802154_llsec_key key = { };
+	struct ieee802154_llsec_key_id id = { };
+	u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { };
+
+	if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
+			     info->attrs[NL802154_ATTR_SEC_KEY],
+			     nl802154_key_policy))
+		return -EINVAL;
+
+	if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] ||
+	    !attrs[NL802154_KEY_ATTR_BYTES])
+		return -EINVAL;
+
+	if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
+		return -ENOBUFS;
+
+	key.frame_types = nla_get_u8(attrs[NL802154_KEY_ATTR_USAGE_FRAMES]);
+	if (key.frame_types > BIT(NL802154_FRAME_MAX) ||
+	    ((key.frame_types & BIT(NL802154_FRAME_CMD)) &&
+	     !attrs[NL802154_KEY_ATTR_USAGE_CMDS]))
+		return -EINVAL;
+
+	if (attrs[NL802154_KEY_ATTR_USAGE_CMDS]) {
+		/* TODO for each nested */
+		nla_memcpy(commands, attrs[NL802154_KEY_ATTR_USAGE_CMDS],
+			   NL802154_CMD_FRAME_NR_IDS / 8);
+
+		/* TODO understand the -EINVAL logic here? last condition */
+		if (commands[0] || commands[1] || commands[2] || commands[3] ||
+		    commands[4] || commands[5] || commands[6] ||
+		    commands[7] > BIT(NL802154_CMD_FRAME_MAX))
+			return -EINVAL;
+
+		key.cmd_frame_ids = commands[7];
+	} else {
+		key.cmd_frame_ids = 0;
+	}
+
+	nla_memcpy(key.key, attrs[NL802154_KEY_ATTR_BYTES], NL802154_KEY_SIZE);
+
+	if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
+		return -ENOBUFS;
+
+	return rdev_add_llsec_key(rdev, wpan_dev, &id, &key);
+}
+
+static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
+	struct ieee802154_llsec_key_id id;
+
+	if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
+			     info->attrs[NL802154_ATTR_SEC_KEY],
+			     nl802154_key_policy))
+		return -EINVAL;
+
+	if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
+		return -ENOBUFS;
+
+	return rdev_del_llsec_key(rdev, wpan_dev, &id);
+}
+
+static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid,
+				u32 seq, int flags,
+				struct cfg802154_registered_device *rdev,
+				struct net_device *dev,
+				const struct ieee802154_llsec_device *dev_desc)
+{
+	void *hdr;
+	struct nlattr *nl_device;
+
+	hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+	if (!hdr)
+		return -1;
+
+	if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+		goto nla_put_failure;
+
+	nl_device = nla_nest_start(msg, NL802154_ATTR_SEC_DEVICE);
+	if (!nl_device)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL802154_DEV_ATTR_FRAME_COUNTER,
+			dev_desc->frame_counter) ||
+	    nla_put_le16(msg, NL802154_DEV_ATTR_PAN_ID, dev_desc->pan_id) ||
+	    nla_put_le16(msg, NL802154_DEV_ATTR_SHORT_ADDR,
+			 dev_desc->short_addr) ||
+	    nla_put_le64(msg, NL802154_DEV_ATTR_EXTENDED_ADDR,
+			 dev_desc->hwaddr) ||
+	    nla_put_u8(msg, NL802154_DEV_ATTR_SECLEVEL_EXEMPT,
+		       dev_desc->seclevel_exempt) ||
+	    nla_put_u32(msg, NL802154_DEV_ATTR_KEY_MODE, dev_desc->key_mode))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, nl_device);
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int
+nl802154_dump_llsec_dev(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct cfg802154_registered_device *rdev = NULL;
+	struct ieee802154_llsec_device *dev;
+	struct ieee802154_llsec_table *table;
+	struct wpan_dev *wpan_dev;
+	int err;
+
+	err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+	if (err)
+		return err;
+
+	if (!wpan_dev->netdev) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	rdev_lock_llsec_table(rdev, wpan_dev);
+	rdev_get_llsec_table(rdev, wpan_dev, &table);
+
+	/* TODO make it like station dump */
+	if (cb->args[2])
+		goto out;
+
+	list_for_each_entry(dev, &table->devices, list) {
+		if (nl802154_send_device(skb, NL802154_CMD_NEW_SEC_LEVEL,
+					 NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 rdev, wpan_dev->netdev, dev) < 0) {
+			/* TODO */
+			err = -EIO;
+			rdev_unlock_llsec_table(rdev, wpan_dev);
+			goto out_err;
+		}
+	}
+
+	cb->args[2] = 1;
+
+out:
+	rdev_unlock_llsec_table(rdev, wpan_dev);
+	err = skb->len;
+out_err:
+	nl802154_finish_wpan_dev_dump(rdev);
+
+	return err;
+}
+
+static const struct nla_policy nl802154_dev_policy[NL802154_DEV_ATTR_MAX + 1] = {
+	[NL802154_DEV_ATTR_FRAME_COUNTER] = { NLA_U32 },
+	[NL802154_DEV_ATTR_PAN_ID] = { .type = NLA_U16 },
+	[NL802154_DEV_ATTR_SHORT_ADDR] = { .type = NLA_U16 },
+	[NL802154_DEV_ATTR_EXTENDED_ADDR] = { .type = NLA_U64 },
+	[NL802154_DEV_ATTR_SECLEVEL_EXEMPT] = { NLA_U8 },
+	[NL802154_DEV_ATTR_KEY_MODE] = { NLA_U32 },
+};
+
+static int
+ieee802154_llsec_parse_device(struct nlattr *nla,
+			      struct ieee802154_llsec_device *dev)
+{
+	struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
+
+	if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, nla,
+				     nl802154_dev_policy))
+		return -EINVAL;
+
+	memset(dev, 0, sizeof(*dev));
+
+	if (!attrs[NL802154_DEV_ATTR_FRAME_COUNTER] ||
+	    !attrs[NL802154_DEV_ATTR_PAN_ID] ||
+	    !attrs[NL802154_DEV_ATTR_SHORT_ADDR] ||
+	    !attrs[NL802154_DEV_ATTR_EXTENDED_ADDR] ||
+	    !attrs[NL802154_DEV_ATTR_SECLEVEL_EXEMPT] ||
+	    !attrs[NL802154_DEV_ATTR_KEY_MODE])
+		return -EINVAL;
+
+	/* TODO be32 */
+	dev->frame_counter = nla_get_u32(attrs[NL802154_DEV_ATTR_FRAME_COUNTER]);
+	dev->pan_id = nla_get_le16(attrs[NL802154_DEV_ATTR_PAN_ID]);
+	dev->short_addr = nla_get_le16(attrs[NL802154_DEV_ATTR_SHORT_ADDR]);
+	/* TODO rename hwaddr to extended_addr */
+	dev->hwaddr = nla_get_le64(attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]);
+	dev->seclevel_exempt = nla_get_u8(attrs[NL802154_DEV_ATTR_SECLEVEL_EXEMPT]);
+	dev->key_mode = nla_get_u32(attrs[NL802154_DEV_ATTR_KEY_MODE]);
+
+	if (dev->key_mode > NL802154_DEVKEY_MAX ||
+	    (dev->seclevel_exempt != 0 && dev->seclevel_exempt != 1))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int nl802154_add_llsec_dev(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct ieee802154_llsec_device dev_desc;
+
+	if (ieee802154_llsec_parse_device(info->attrs[NL802154_ATTR_SEC_DEVICE],
+					  &dev_desc) < 0)
+		return -EINVAL;
+
+	return rdev_add_device(rdev, wpan_dev, &dev_desc);
+}
+
+static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
+	__le64 extended_addr;
+
+	if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
+			     info->attrs[NL802154_ATTR_SEC_DEVICE],
+			     nl802154_dev_policy))
+		return -EINVAL;
+
+	if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR])
+		return -EINVAL;
+
+	extended_addr = nla_get_le64(attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]);
+	return rdev_del_device(rdev, wpan_dev, extended_addr);
+}
+
+static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid,
+				u32 seq, int flags,
+				struct cfg802154_registered_device *rdev,
+				struct net_device *dev, __le64 extended_addr,
+				const struct ieee802154_llsec_device_key *devkey)
+{
+	void *hdr;
+	struct nlattr *nl_devkey, *nl_key_id;
+
+	hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+	if (!hdr)
+		return -1;
+
+	if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+		goto nla_put_failure;
+
+	nl_devkey = nla_nest_start(msg, NL802154_ATTR_SEC_DEVKEY);
+	if (!nl_devkey)
+		goto nla_put_failure;
+
+	if (nla_put_le64(msg, NL802154_DEVKEY_ATTR_EXTENDED_ADDR,
+			 extended_addr) ||
+	    nla_put_u32(msg, NL802154_DEVKEY_ATTR_FRAME_COUNTER,
+			devkey->frame_counter))
+		goto nla_put_failure;
+
+	nl_key_id = nla_nest_start(msg, NL802154_DEVKEY_ATTR_ID);
+	if (!nl_key_id)
+		goto nla_put_failure;
+
+	if (ieee802154_llsec_send_key_id(msg, &devkey->key_id) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(msg, nl_key_id);
+	nla_nest_end(msg, nl_devkey);
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int
+nl802154_dump_llsec_devkey(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct cfg802154_registered_device *rdev = NULL;
+	struct ieee802154_llsec_device_key *kpos;
+	struct ieee802154_llsec_device *dpos;
+	struct ieee802154_llsec_table *table;
+	struct wpan_dev *wpan_dev;
+	int err;
+
+	err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+	if (err)
+		return err;
+
+	if (!wpan_dev->netdev) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	rdev_lock_llsec_table(rdev, wpan_dev);
+	rdev_get_llsec_table(rdev, wpan_dev, &table);
+
+	/* TODO make it like station dump */
+	if (cb->args[2])
+		goto out;
+
+	/* TODO look if remove devkey and do some nested attribute */
+	list_for_each_entry(dpos, &table->devices, list) {
+		list_for_each_entry(kpos, &dpos->keys, list) {
+			if (nl802154_send_devkey(skb,
+						 NL802154_CMD_NEW_SEC_LEVEL,
+						 NETLINK_CB(cb->skb).portid,
+						 cb->nlh->nlmsg_seq,
+						 NLM_F_MULTI, rdev,
+						 wpan_dev->netdev,
+						 dpos->hwaddr,
+						 kpos) < 0) {
+				/* TODO */
+				err = -EIO;
+				rdev_unlock_llsec_table(rdev, wpan_dev);
+				goto out_err;
+			}
+		}
+	}
+
+	cb->args[2] = 1;
+
+out:
+	rdev_unlock_llsec_table(rdev, wpan_dev);
+	err = skb->len;
+out_err:
+	nl802154_finish_wpan_dev_dump(rdev);
+
+	return err;
+}
+
+static const struct nla_policy nl802154_devkey_policy[NL802154_DEVKEY_ATTR_MAX + 1] = {
+	[NL802154_DEVKEY_ATTR_FRAME_COUNTER] = { NLA_U32 },
+	[NL802154_DEVKEY_ATTR_EXTENDED_ADDR] = { NLA_U64 },
+	[NL802154_DEVKEY_ATTR_ID] = { NLA_NESTED },
+};
+
+static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct nlattr *attrs[NL802154_DEVKEY_ATTR_MAX + 1];
+	struct ieee802154_llsec_device_key key;
+	__le64 extended_addr;
+
+	if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
+	    nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
+			     info->attrs[NL802154_ATTR_SEC_DEVKEY],
+			     nl802154_devkey_policy) < 0)
+		return -EINVAL;
+
+	if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] ||
+	    !attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
+		return -EINVAL;
+
+	/* TODO change key.id ? */
+	if (ieee802154_llsec_parse_key_id(attrs[NL802154_DEVKEY_ATTR_ID],
+					  &key.key_id) < 0)
+		return -ENOBUFS;
+
+	/* TODO be32 */
+	key.frame_counter = nla_get_u32(attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER]);
+	/* TODO change naming hwaddr -> extended_addr
+	 * check unique identifier short+pan OR extended_addr
+	 */
+	extended_addr = nla_get_le64(attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]);
+	return rdev_add_devkey(rdev, wpan_dev, extended_addr, &key);
+}
+
+static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct nlattr *attrs[NL802154_DEVKEY_ATTR_MAX + 1];
+	struct ieee802154_llsec_device_key key;
+	__le64 extended_addr;
+
+	if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
+			     info->attrs[NL802154_ATTR_SEC_DEVKEY],
+			     nl802154_devkey_policy))
+		return -EINVAL;
+
+	if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
+		return -EINVAL;
+
+	/* TODO change key.id ? */
+	if (ieee802154_llsec_parse_key_id(attrs[NL802154_DEVKEY_ATTR_ID],
+					  &key.key_id) < 0)
+		return -ENOBUFS;
+
+	/* TODO change naming hwaddr -> extended_addr
+	 * check unique identifier short+pan OR extended_addr
+	 */
+	extended_addr = nla_get_le64(attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]);
+	return rdev_del_devkey(rdev, wpan_dev, extended_addr, &key);
+}
+
+static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid,
+				  u32 seq, int flags,
+				  struct cfg802154_registered_device *rdev,
+				  struct net_device *dev,
+				  const struct ieee802154_llsec_seclevel *sl)
+{
+	void *hdr;
+	struct nlattr *nl_seclevel;
+
+	hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+	if (!hdr)
+		return -1;
+
+	if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+		goto nla_put_failure;
+
+	nl_seclevel = nla_nest_start(msg, NL802154_ATTR_SEC_LEVEL);
+	if (!nl_seclevel)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL802154_SECLEVEL_ATTR_FRAME, sl->frame_type) ||
+	    nla_put_u32(msg, NL802154_SECLEVEL_ATTR_LEVELS, sl->sec_levels) ||
+	    nla_put_u8(msg, NL802154_SECLEVEL_ATTR_DEV_OVERRIDE,
+		       sl->device_override))
+		goto nla_put_failure;
+
+	if (sl->frame_type == NL802154_FRAME_CMD) {
+		if (nla_put_u32(msg, NL802154_SECLEVEL_ATTR_CMD_FRAME,
+				sl->cmd_frame_id))
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(msg, nl_seclevel);
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int
+nl802154_dump_llsec_seclevel(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct cfg802154_registered_device *rdev = NULL;
+	struct ieee802154_llsec_seclevel *sl;
+	struct ieee802154_llsec_table *table;
+	struct wpan_dev *wpan_dev;
+	int err;
+
+	err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+	if (err)
+		return err;
+
+	if (!wpan_dev->netdev) {
+		err = -EINVAL;
+		goto out_err;
+	}
+
+	rdev_lock_llsec_table(rdev, wpan_dev);
+	rdev_get_llsec_table(rdev, wpan_dev, &table);
+
+	/* TODO make it like station dump */
+	if (cb->args[2])
+		goto out;
+
+	list_for_each_entry(sl, &table->security_levels, list) {
+		if (nl802154_send_seclevel(skb, NL802154_CMD_NEW_SEC_LEVEL,
+					   NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					   rdev, wpan_dev->netdev, sl) < 0) {
+			/* TODO */
+			err = -EIO;
+			rdev_unlock_llsec_table(rdev, wpan_dev);
+			goto out_err;
+		}
+	}
+
+	cb->args[2] = 1;
+
+out:
+	rdev_unlock_llsec_table(rdev, wpan_dev);
+	err = skb->len;
+out_err:
+	nl802154_finish_wpan_dev_dump(rdev);
+
+	return err;
+}
+
+static const struct nla_policy nl802154_seclevel_policy[NL802154_SECLEVEL_ATTR_MAX + 1] = {
+	[NL802154_SECLEVEL_ATTR_LEVELS] = { .type = NLA_U8 },
+	[NL802154_SECLEVEL_ATTR_FRAME] = { .type = NLA_U32 },
+	[NL802154_SECLEVEL_ATTR_CMD_FRAME] = { .type = NLA_U32 },
+	[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE] = { .type = NLA_U8 },
+};
+
+static int
+llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl)
+{
+	struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1];
+
+	if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, nla,
+				     nl802154_seclevel_policy))
+		return -EINVAL;
+
+	memset(sl, 0, sizeof(*sl));
+
+	if (!attrs[NL802154_SECLEVEL_ATTR_LEVELS] ||
+	    !attrs[NL802154_SECLEVEL_ATTR_FRAME] ||
+	    !attrs[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE])
+		return -EINVAL;
+
+	sl->sec_levels = nla_get_u8(attrs[NL802154_SECLEVEL_ATTR_LEVELS]);
+	sl->frame_type = nla_get_u32(attrs[NL802154_SECLEVEL_ATTR_FRAME]);
+	sl->device_override = nla_get_u8(attrs[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE]);
+	if (sl->frame_type > NL802154_FRAME_MAX ||
+	    (sl->device_override != 0 && sl->device_override != 1))
+		return -EINVAL;
+
+	if (sl->frame_type == NL802154_FRAME_CMD) {
+		if (!attrs[NL802154_SECLEVEL_ATTR_CMD_FRAME])
+			return -EINVAL;
+
+		sl->cmd_frame_id = nla_get_u32(attrs[NL802154_SECLEVEL_ATTR_CMD_FRAME]);
+		if (sl->cmd_frame_id > NL802154_CMD_FRAME_MAX)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nl802154_add_llsec_seclevel(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct ieee802154_llsec_seclevel sl;
+
+	if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
+				 &sl) < 0)
+		return -EINVAL;
+
+	return rdev_add_seclevel(rdev, wpan_dev, &sl);
+}
+
+static int nl802154_del_llsec_seclevel(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	struct ieee802154_llsec_seclevel sl;
+
+	if (!info->attrs[NL802154_ATTR_SEC_LEVEL] ||
+	    llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
+				 &sl) < 0)
+		return -EINVAL;
+
+	return rdev_del_seclevel(rdev, wpan_dev, &sl);
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
 #define NL802154_FLAG_NEED_WPAN_PHY	0x01
 #define NL802154_FLAG_NEED_NETDEV	0x02
 #define NL802154_FLAG_NEED_RTNL		0x04
@@ -1289,6 +2303,119 @@ static const struct genl_ops nl802154_ops[] = {
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+	{
+		.cmd = NL802154_CMD_SET_SEC_PARAMS,
+		.doit = nl802154_set_llsec_params,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_GET_SEC_KEY,
+		/* TODO .doit by matching key id? */
+		.dumpit = nl802154_dump_llsec_key,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_NEW_SEC_KEY,
+		.doit = nl802154_add_llsec_key,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_DEL_SEC_KEY,
+		.doit = nl802154_del_llsec_key,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	/* TODO unique identifier must short+pan OR extended_addr */
+	{
+		.cmd = NL802154_CMD_GET_SEC_DEV,
+		/* TODO .doit by matching extended_addr? */
+		.dumpit = nl802154_dump_llsec_dev,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_NEW_SEC_DEV,
+		.doit = nl802154_add_llsec_dev,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_DEL_SEC_DEV,
+		.doit = nl802154_del_llsec_dev,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	/* TODO remove complete devkey, put it as nested? */
+	{
+		.cmd = NL802154_CMD_GET_SEC_DEVKEY,
+		/* TODO doit by matching ??? */
+		.dumpit = nl802154_dump_llsec_devkey,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_NEW_SEC_DEVKEY,
+		.doit = nl802154_add_llsec_devkey,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_DEL_SEC_DEVKEY,
+		.doit = nl802154_del_llsec_devkey,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_GET_SEC_LEVEL,
+		/* TODO .doit by matching frame_type? */
+		.dumpit = nl802154_dump_llsec_seclevel,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_NEW_SEC_LEVEL,
+		.doit = nl802154_add_llsec_seclevel,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL802154_CMD_DEL_SEC_LEVEL,
+		/* TODO match frame_type only? */
+		.doit = nl802154_del_llsec_seclevel,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 };
 
 /* initialisation/exit functions */
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 03b357501cc5..4441c63b3ea6 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -208,4 +208,113 @@ rdev_set_ackreq_default(struct cfg802154_registered_device *rdev,
 	return ret;
 }
 
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+/* TODO this is already a nl802154, so move into ieee802154 */
+static inline void
+rdev_get_llsec_table(struct cfg802154_registered_device *rdev,
+		     struct wpan_dev *wpan_dev,
+		     struct ieee802154_llsec_table **table)
+{
+	rdev->ops->get_llsec_table(&rdev->wpan_phy, wpan_dev, table);
+}
+
+static inline void
+rdev_lock_llsec_table(struct cfg802154_registered_device *rdev,
+		      struct wpan_dev *wpan_dev)
+{
+	rdev->ops->lock_llsec_table(&rdev->wpan_phy, wpan_dev);
+}
+
+static inline void
+rdev_unlock_llsec_table(struct cfg802154_registered_device *rdev,
+			struct wpan_dev *wpan_dev)
+{
+	rdev->ops->unlock_llsec_table(&rdev->wpan_phy, wpan_dev);
+}
+
+static inline int
+rdev_get_llsec_params(struct cfg802154_registered_device *rdev,
+		      struct wpan_dev *wpan_dev,
+		      struct ieee802154_llsec_params *params)
+{
+	return rdev->ops->get_llsec_params(&rdev->wpan_phy, wpan_dev, params);
+}
+
+static inline int
+rdev_set_llsec_params(struct cfg802154_registered_device *rdev,
+		      struct wpan_dev *wpan_dev,
+		      const struct ieee802154_llsec_params *params,
+		      u32 changed)
+{
+	return rdev->ops->set_llsec_params(&rdev->wpan_phy, wpan_dev, params,
+					   changed);
+}
+
+static inline int
+rdev_add_llsec_key(struct cfg802154_registered_device *rdev,
+		   struct wpan_dev *wpan_dev,
+		   const struct ieee802154_llsec_key_id *id,
+		   const struct ieee802154_llsec_key *key)
+{
+	return rdev->ops->add_llsec_key(&rdev->wpan_phy, wpan_dev, id, key);
+}
+
+static inline int
+rdev_del_llsec_key(struct cfg802154_registered_device *rdev,
+		   struct wpan_dev *wpan_dev,
+		   const struct ieee802154_llsec_key_id *id)
+{
+	return rdev->ops->del_llsec_key(&rdev->wpan_phy, wpan_dev, id);
+}
+
+static inline int
+rdev_add_seclevel(struct cfg802154_registered_device *rdev,
+		  struct wpan_dev *wpan_dev,
+		  const struct ieee802154_llsec_seclevel *sl)
+{
+	return rdev->ops->add_seclevel(&rdev->wpan_phy, wpan_dev, sl);
+}
+
+static inline int
+rdev_del_seclevel(struct cfg802154_registered_device *rdev,
+		  struct wpan_dev *wpan_dev,
+		  const struct ieee802154_llsec_seclevel *sl)
+{
+	return rdev->ops->del_seclevel(&rdev->wpan_phy, wpan_dev, sl);
+}
+
+static inline int
+rdev_add_device(struct cfg802154_registered_device *rdev,
+		struct wpan_dev *wpan_dev,
+		const struct ieee802154_llsec_device *dev_desc)
+{
+	return rdev->ops->add_device(&rdev->wpan_phy, wpan_dev, dev_desc);
+}
+
+static inline int
+rdev_del_device(struct cfg802154_registered_device *rdev,
+		struct wpan_dev *wpan_dev, __le64 extended_addr)
+{
+	return rdev->ops->del_device(&rdev->wpan_phy, wpan_dev, extended_addr);
+}
+
+static inline int
+rdev_add_devkey(struct cfg802154_registered_device *rdev,
+		struct wpan_dev *wpan_dev, __le64 extended_addr,
+		const struct ieee802154_llsec_device_key *devkey)
+{
+	return rdev->ops->add_devkey(&rdev->wpan_phy, wpan_dev, extended_addr,
+				     devkey);
+}
+
+static inline int
+rdev_del_devkey(struct cfg802154_registered_device *rdev,
+		struct wpan_dev *wpan_dev, __le64 extended_addr,
+		const struct ieee802154_llsec_device_key *devkey)
+{
+	return rdev->ops->del_devkey(&rdev->wpan_phy, wpan_dev, extended_addr,
+				     devkey);
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
 #endif /* __CFG802154_RDEV_OPS */
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index b6eacf30ee7a..a548be247e15 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -273,7 +273,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		goto out;
 	}
 
-	mtu = dev->mtu;
+	mtu = IEEE802154_MTU;
 	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
 
 	if (size > mtu) {
@@ -637,7 +637,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		err = -ENXIO;
 		goto out;
 	}
-	mtu = dev->mtu;
+	mtu = IEEE802154_MTU;
 	pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
 
 	if (size > mtu) {
@@ -676,8 +676,8 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	cb->seclevel = ro->seclevel;
 	cb->seclevel_override = ro->seclevel_override;
 
-	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
-			      ro->bound ? &ro->src_addr : NULL, size);
+	err = wpan_dev_hard_header(skb, dev, &dst_addr,
+				   ro->bound ? &ro->src_addr : NULL, size);
 	if (err < 0)
 		goto out_skb;
 
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 89aacb630a53..c29809f765dc 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,6 +8,7 @@ obj-y     := route.o inetpeer.o protocol.o \
 	     inet_timewait_sock.o inet_connection_sock.o \
 	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
 	     tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
+	     tcp_recovery.o \
 	     tcp_offload.o datagram.o raw.o udp.o udplite.o \
 	     udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
 	     fib_frontend.o fib_semantics.o fib_trie.o \
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1d0c3adb6f34..11c4ca13ec3b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,7 +119,7 @@
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
 #endif
-#include <net/vrf.h>
+#include <net/l3mdev.h>
 
 
 /* The inetsw table contains everything that inet_create needs to
@@ -219,17 +219,13 @@ int inet_listen(struct socket *sock, int backlog)
 		 * shutdown() (rather than close()).
 		 */
 		if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
-		    !inet_csk(sk)->icsk_accept_queue.fastopenq) {
+		    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
 			if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
-				err = fastopen_init_queue(sk, backlog);
+				fastopen_queue_tune(sk, backlog);
 			else if ((sysctl_tcp_fastopen &
 				  TFO_SERVER_WO_SOCKOPT2) != 0)
-				err = fastopen_init_queue(sk,
+				fastopen_queue_tune(sk,
 				    ((uint)sysctl_tcp_fastopen) >> 16);
-			else
-				err = 0;
-			if (err)
-				goto out;
 
 			tcp_fastopen_init_key_once(true);
 		}
@@ -450,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			goto out;
 	}
 
-	tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id;
+	tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
 	chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
 
 	/* Not specified by any standard per-se, however it breaks too
@@ -1043,22 +1039,16 @@ void inet_register_protosw(struct inet_protosw *p)
 		goto out_illegal;
 
 	/* If we are trying to override a permanent protocol, bail. */
-	answer = NULL;
 	last_perm = &inetsw[p->type];
 	list_for_each(lh, &inetsw[p->type]) {
 		answer = list_entry(lh, struct inet_protosw, list);
-
 		/* Check only the non-wild match. */
-		if (INET_PROTOSW_PERMANENT & answer->flags) {
-			if (protocol == answer->protocol)
-				break;
-			last_perm = lh;
-		}
-
-		answer = NULL;
+		if ((INET_PROTOSW_PERMANENT & answer->flags) == 0)
+			break;
+		if (protocol == answer->protocol)
+			goto out_permanent;
+		last_perm = lh;
 	}
-	if (answer)
-		goto out_permanent;
 
 	/* Add the new entry after the last permanent entry if any, so that
 	 * the new entry does not override a permanent entry when matched with
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0c9c3482e419..59b3e0e8fd51 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -624,14 +624,20 @@ out:
 }
 EXPORT_SYMBOL(arp_create);
 
+static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	return dev_queue_xmit(skb);
+}
+
 /*
  *	Send an arp packet.
  */
 void arp_xmit(struct sk_buff *skb)
 {
 	/* Send it off, maybe filter it using firewalling first.  */
-	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb,
-		NULL, skb->dev, dev_queue_xmit_sk);
+	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT,
+		dev_net(skb->dev), NULL, skb, NULL, skb->dev,
+		arp_xmit_finish);
 }
 EXPORT_SYMBOL(arp_xmit);
 
@@ -639,7 +645,7 @@ EXPORT_SYMBOL(arp_xmit);
  *	Process an arp request.
  */
 
-static int arp_process(struct sock *sk, struct sk_buff *skb)
+static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -651,7 +657,6 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
 	u16 dev_type = dev->type;
 	int addr_type;
 	struct neighbour *n;
-	struct net *net = dev_net(dev);
 	struct dst_entry *reply_dst = NULL;
 	bool is_garp = false;
 
@@ -872,7 +877,7 @@ out_free_dst:
 
 static void parp_redo(struct sk_buff *skb)
 {
-	arp_process(NULL, skb);
+	arp_process(dev_net(skb->dev), NULL, skb);
 }
 
 
@@ -905,8 +910,9 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
 
-	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb,
-		       dev, NULL, arp_process);
+	return NF_HOOK(NFPROTO_ARP, NF_ARP_IN,
+		       dev_net(dev), NULL, skb, dev, NULL,
+		       arp_process);
 
 consumeskb:
 	consume_skb(skb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2d9cb1748f81..cebd9d31e65a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1644,7 +1644,8 @@ errout:
 		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
 }
 
-static size_t inet_get_link_af_size(const struct net_device *dev)
+static size_t inet_get_link_af_size(const struct net_device *dev,
+				    u32 ext_filter_mask)
 {
 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 
@@ -1654,7 +1655,8 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
 	return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
 }
 
-static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
+			     u32 ext_filter_mask)
 {
 	struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 	struct nlattr *nla;
@@ -2397,4 +2399,3 @@ void __init devinet_init(void)
 	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf,
 		      inet_netconf_dump_devconf, NULL);
 }
-
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 690bcbc59f26..cc8f3e506cde 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -45,7 +45,7 @@
 #include <net/ip_fib.h>
 #include <net/rtnetlink.h>
 #include <net/xfrm.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
 #include <trace/events/fib.h>
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(inet_addr_type);
 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 				__be32 addr)
 {
-	u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+	u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
 
 	return __inet_dev_addr_type(net, dev, addr, rt_table);
 }
@@ -268,7 +268,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
 				      const struct net_device *dev,
 				      __be32 addr)
 {
-	u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+	u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
 
 	return __inet_dev_addr_type(net, NULL, addr, rt_table);
 }
@@ -332,7 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	bool dev_match;
 
 	fl4.flowi4_oif = 0;
-	fl4.flowi4_iif = vrf_master_ifindex_rcu(dev);
+	fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
 	if (!fl4.flowi4_iif)
 		fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
 	fl4.daddr = src;
@@ -367,7 +367,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 		if (nh->nh_dev == dev) {
 			dev_match = true;
 			break;
-		} else if (vrf_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+		} else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
 			dev_match = true;
 			break;
 		}
@@ -804,7 +804,7 @@ out:
 static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
 {
 	struct net *net = dev_net(ifa->ifa_dev->dev);
-	u32 tb_id = vrf_dev_table_rtnl(ifa->ifa_dev->dev);
+	u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
 	struct fib_table *tb;
 	struct fib_config cfg = {
 		.fc_protocol = RTPROT_KERNEL,
@@ -867,9 +867,10 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 
 	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
-		fib_magic(RTM_NEWROUTE,
-			  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
-			  prefix, ifa->ifa_prefixlen, prim);
+		if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
+			fib_magic(RTM_NEWROUTE,
+				  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+				  prefix, ifa->ifa_prefixlen, prim);
 
 		/* Add network specific broadcasts, when it takes a sense */
 		if (ifa->ifa_prefixlen < 31) {
@@ -914,9 +915,10 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 		}
 	} else if (!ipv4_is_zeronet(any) &&
 		   (any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) {
-		fib_magic(RTM_DELROUTE,
-			  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
-			  any, ifa->ifa_prefixlen, prim);
+		if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
+			fib_magic(RTM_DELROUTE,
+				  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+				  any, ifa->ifa_prefixlen, prim);
 		subnet = 1;
 	}
 
@@ -1110,9 +1112,10 @@ static void nl_fib_lookup_exit(struct net *net)
 	net->ipv4.fibnl = NULL;
 }
 
-static void fib_disable_ip(struct net_device *dev, unsigned long event)
+static void fib_disable_ip(struct net_device *dev, unsigned long event,
+			   bool force)
 {
-	if (fib_sync_down_dev(dev, event))
+	if (fib_sync_down_dev(dev, event, force))
 		fib_flush(dev_net(dev));
 	rt_cache_flush(dev_net(dev));
 	arp_ifdown(dev);
@@ -1140,7 +1143,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 			/* Last address was deleted from this interface.
 			 * Disable IP.
 			 */
-			fib_disable_ip(dev, event);
+			fib_disable_ip(dev, event, true);
 		} else {
 			rt_cache_flush(dev_net(dev));
 		}
@@ -1157,7 +1160,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	unsigned int flags;
 
 	if (event == NETDEV_UNREGISTER) {
-		fib_disable_ip(dev, event);
+		fib_disable_ip(dev, event, true);
 		rt_flush_dev(dev);
 		return NOTIFY_DONE;
 	}
@@ -1178,14 +1181,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		rt_cache_flush(net);
 		break;
 	case NETDEV_DOWN:
-		fib_disable_ip(dev, event);
+		fib_disable_ip(dev, event, false);
 		break;
 	case NETDEV_CHANGE:
 		flags = dev_get_flags(dev);
 		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
 			fib_sync_up(dev, RTNH_F_LINKDOWN);
 		else
-			fib_sync_down_dev(dev, event);
+			fib_sync_down_dev(dev, event, false);
 		/* fall through */
 	case NETDEV_CHANGEMTU:
 		rt_cache_flush(net);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 064bd3caaa4f..d97268e8ff10 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -57,8 +57,7 @@ static unsigned int fib_info_cnt;
 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-
-static DEFINE_SPINLOCK(fib_multipath_lock);
+u32 fib_multipath_secret __read_mostly;
 
 #define for_nexthops(fi) {						\
 	int nhsel; const struct fib_nh *nh;				\
@@ -532,7 +531,67 @@ errout:
 	return ret;
 }
 
-#endif
+static void fib_rebalance(struct fib_info *fi)
+{
+	int total;
+	int w;
+	struct in_device *in_dev;
+
+	if (fi->fib_nhs < 2)
+		return;
+
+	total = 0;
+	for_nexthops(fi) {
+		if (nh->nh_flags & RTNH_F_DEAD)
+			continue;
+
+		in_dev = __in_dev_get_rtnl(nh->nh_dev);
+
+		if (in_dev &&
+		    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+		    nh->nh_flags & RTNH_F_LINKDOWN)
+			continue;
+
+		total += nh->nh_weight;
+	} endfor_nexthops(fi);
+
+	w = 0;
+	change_nexthops(fi) {
+		int upper_bound;
+
+		in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
+
+		if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
+			upper_bound = -1;
+		} else if (in_dev &&
+			   IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+			   nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
+			upper_bound = -1;
+		} else {
+			w += nexthop_nh->nh_weight;
+			upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
+							    total) - 1;
+		}
+
+		atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
+	} endfor_nexthops(fi);
+
+	net_get_random_once(&fib_multipath_secret,
+			    sizeof(fib_multipath_secret));
+}
+
+static inline void fib_add_weight(struct fib_info *fi,
+				  const struct fib_nh *nh)
+{
+	fi->fib_weight += nh->nh_weight;
+}
+
+#else /* CONFIG_IP_ROUTE_MULTIPATH */
+
+#define fib_rebalance(fi) do { } while (0)
+#define fib_add_weight(fi, nh) do { } while (0)
+
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
 
 static int fib_encap_match(struct net *net, u16 encap_type,
 			   struct nlattr *encap,
@@ -864,14 +923,21 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
 	if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
 	    fib_prefsrc != cfg->fc_dst) {
 		u32 tb_id = cfg->fc_table;
+		int rc;
 
 		if (tb_id == RT_TABLE_MAIN)
 			tb_id = RT_TABLE_LOCAL;
 
-		if (inet_addr_type_table(cfg->fc_nlinfo.nl_net,
-					 fib_prefsrc, tb_id) != RTN_LOCAL) {
-			return false;
+		rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+					  fib_prefsrc, tb_id);
+
+		if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
+			rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+						  fib_prefsrc, RT_TABLE_LOCAL);
 		}
+
+		if (rc != RTN_LOCAL)
+			return false;
 	}
 	return true;
 }
@@ -1094,8 +1160,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 
 	change_nexthops(fi) {
 		fib_info_update_nh_saddr(net, nexthop_nh);
+		fib_add_weight(fi, nexthop_nh);
 	} endfor_nexthops(fi)
 
+	fib_rebalance(fi);
+
 link_it:
 	ofi = fib_find_info(fi);
 	if (ofi) {
@@ -1281,7 +1350,13 @@ int fib_sync_down_addr(struct net *net, __be32 local)
 	return ret;
 }
 
-int fib_sync_down_dev(struct net_device *dev, unsigned long event)
+/* Event              force Flags           Description
+ * NETDEV_CHANGE      0     LINKDOWN        Carrier OFF, not for scope host
+ * NETDEV_DOWN        0     LINKDOWN|DEAD   Link down, not for scope host
+ * NETDEV_DOWN        1     LINKDOWN|DEAD   Last address removed
+ * NETDEV_UNREGISTER  1     LINKDOWN|DEAD   Device removed
+ */
+int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 {
 	int ret = 0;
 	int scope = RT_SCOPE_NOWHERE;
@@ -1290,8 +1365,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
 	struct hlist_head *head = &fib_info_devhash[hash];
 	struct fib_nh *nh;
 
-	if (event == NETDEV_UNREGISTER ||
-	    event == NETDEV_DOWN)
+	if (force)
 		scope = -1;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
@@ -1317,12 +1391,6 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
 					nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
 					break;
 				}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-				spin_lock_bh(&fib_multipath_lock);
-				fi->fib_power -= nexthop_nh->nh_power;
-				nexthop_nh->nh_power = 0;
-				spin_unlock_bh(&fib_multipath_lock);
-#endif
 				dead++;
 			}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1345,6 +1413,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
 			}
 			ret++;
 		}
+
+		fib_rebalance(fi);
 	}
 
 	return ret;
@@ -1440,6 +1510,13 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 	if (!(dev->flags & IFF_UP))
 		return 0;
 
+	if (nh_flags & RTNH_F_DEAD) {
+		unsigned int flags = dev_get_flags(dev);
+
+		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+			nh_flags |= RTNH_F_LINKDOWN;
+	}
+
 	prev_fi = NULL;
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];
@@ -1467,20 +1544,15 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 			    !__in_dev_get_rtnl(dev))
 				continue;
 			alive++;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-			spin_lock_bh(&fib_multipath_lock);
-			nexthop_nh->nh_power = 0;
 			nexthop_nh->nh_flags &= ~nh_flags;
-			spin_unlock_bh(&fib_multipath_lock);
-#else
-			nexthop_nh->nh_flags &= ~nh_flags;
-#endif
 		} endfor_nexthops(fi)
 
 		if (alive > 0) {
 			fi->fib_flags &= ~nh_flags;
 			ret++;
 		}
+
+		fib_rebalance(fi);
 	}
 
 	return ret;
@@ -1488,62 +1560,41 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 
-/*
- * The algorithm is suboptimal, but it provides really
- * fair weighted route distribution.
- */
-void fib_select_multipath(struct fib_result *res)
+void fib_select_multipath(struct fib_result *res, int hash)
 {
 	struct fib_info *fi = res->fi;
-	struct in_device *in_dev;
-	int w;
-
-	spin_lock_bh(&fib_multipath_lock);
-	if (fi->fib_power <= 0) {
-		int power = 0;
-		change_nexthops(fi) {
-			in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
-			if (nexthop_nh->nh_flags & RTNH_F_DEAD)
-				continue;
-			if (in_dev &&
-			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-			    nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
-				continue;
-			power += nexthop_nh->nh_weight;
-			nexthop_nh->nh_power = nexthop_nh->nh_weight;
-		} endfor_nexthops(fi);
-		fi->fib_power = power;
-		if (power <= 0) {
-			spin_unlock_bh(&fib_multipath_lock);
-			/* Race condition: route has just become dead. */
-			res->nh_sel = 0;
-			return;
-		}
-	}
-
-
-	/* w should be random number [0..fi->fib_power-1],
-	 * it is pretty bad approximation.
-	 */
 
-	w = jiffies % fi->fib_power;
+	for_nexthops(fi) {
+		if (hash > atomic_read(&nh->nh_upper_bound))
+			continue;
 
-	change_nexthops(fi) {
-		if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
-		    nexthop_nh->nh_power) {
-			w -= nexthop_nh->nh_power;
-			if (w <= 0) {
-				nexthop_nh->nh_power--;
-				fi->fib_power--;
-				res->nh_sel = nhsel;
-				spin_unlock_bh(&fib_multipath_lock);
-				return;
-			}
-		}
+		res->nh_sel = nhsel;
+		return;
 	} endfor_nexthops(fi);
 
 	/* Race condition: route has just become dead. */
 	res->nh_sel = 0;
-	spin_unlock_bh(&fib_multipath_lock);
 }
 #endif
+
+void fib_select_path(struct net *net, struct fib_result *res,
+		     struct flowi4 *fl4, int mp_hash)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
+		if (mp_hash < 0)
+			mp_hash = get_hash_from_flowi4(fl4) >> 1;
+
+		fib_select_multipath(res, mp_hash);
+	}
+	else
+#endif
+	if (!res->prefixlen &&
+	    res->table->tb_num_default > 1 &&
+	    res->type == RTN_UNICAST && !fl4->flowi4_oif)
+		fib_select_default(fl4, res);
+
+	if (!fl4->saddr)
+		fl4->saddr = FIB_RES_PREFSRC(net, *res);
+}
+EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5eb8ac4089d..36e26977c908 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -96,7 +96,7 @@
 #include <net/xfrm.h>
 #include <net/inet_common.h>
 #include <net/ip_fib.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
 
 /*
  *	Build xmit assembly blocks
@@ -309,7 +309,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 
 	rc = false;
 	if (icmp_global_allow()) {
-		int vif = vrf_master_ifindex(dst->dev);
+		int vif = l3mdev_master_ifindex(dst->dev);
 		struct inet_peer *peer;
 
 		peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
@@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 	fl4.flowi4_mark = mark;
 	fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 	fl4.flowi4_proto = IPPROTO_ICMP;
-	fl4.flowi4_oif = vrf_master_ifindex(skb->dev);
+	fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
 	security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
 	rt = ip_route_output_key(net, &fl4);
 	if (IS_ERR(rt))
@@ -440,6 +440,22 @@ out_unlock:
 	icmp_xmit_unlock(sk);
 }
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+/* Source and destination is swapped. See ip_multipath_icmp_hash */
+static int icmp_multipath_hash_skb(const struct sk_buff *skb)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+
+	return fib_multipath_hash(iph->daddr, iph->saddr);
+}
+
+#else
+
+#define icmp_multipath_hash_skb(skb) (-1)
+
+#endif
+
 static struct rtable *icmp_route_lookup(struct net *net,
 					struct flowi4 *fl4,
 					struct sk_buff *skb_in,
@@ -461,10 +477,11 @@ static struct rtable *icmp_route_lookup(struct net *net,
 	fl4->flowi4_proto = IPPROTO_ICMP;
 	fl4->fl4_icmp_type = type;
 	fl4->fl4_icmp_code = code;
-	fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev);
+	fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
 
 	security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
-	rt = __ip_route_output_key(net, fl4);
+	rt = __ip_route_output_key_hash(net, fl4,
+					icmp_multipath_hash_skb(skb_in));
 	if (IS_ERR(rt))
 		return rt;
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d38b8b61eaee..6baf36e11808 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -397,7 +397,7 @@ static int igmpv3_sendpack(struct sk_buff *skb)
 
 	pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
 
-	return ip_local_out(skb);
+	return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
 }
 
 static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
@@ -739,7 +739,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	ih->group = group;
 	ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
 
-	return ip_local_out(skb);
+	return ip_local_out(net, skb->sk, skb);
 }
 
 static void igmp_gq_timer_expire(unsigned long data)
@@ -2392,11 +2392,11 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 	struct ip_sf_socklist *psl;
 	struct net *net = sock_net(sk);
 
+	ASSERT_RTNL();
+
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	rtnl_lock();
-
 	imr.imr_multiaddr.s_addr = msf->imsf_multiaddr;
 	imr.imr_address.s_addr = msf->imsf_interface;
 	imr.imr_ifindex = 0;
@@ -2417,7 +2417,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 		goto done;
 	msf->imsf_fmode = pmc->sfmode;
 	psl = rtnl_dereference(pmc->sflist);
-	rtnl_unlock();
 	if (!psl) {
 		len = 0;
 		count = 0;
@@ -2436,7 +2435,6 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 		return -EFAULT;
 	return 0;
 done:
-	rtnl_unlock();
 	return err;
 }
 
@@ -2450,6 +2448,8 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ip_sf_socklist *psl;
 
+	ASSERT_RTNL();
+
 	psin = (struct sockaddr_in *)&gsf->gf_group;
 	if (psin->sin_family != AF_INET)
 		return -EINVAL;
@@ -2457,8 +2457,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	rtnl_lock();
-
 	err = -EADDRNOTAVAIL;
 
 	for_each_pmc_rtnl(inet, pmc) {
@@ -2470,7 +2468,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 		goto done;
 	gsf->gf_fmode = pmc->sfmode;
 	psl = rtnl_dereference(pmc->sflist);
-	rtnl_unlock();
 	count = psl ? psl->sl_count : 0;
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
@@ -2490,7 +2487,6 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	}
 	return 0;
 done:
-	rtnl_unlock();
 	return err;
 }
 
@@ -2569,7 +2565,7 @@ void ip_mc_drop_socket(struct sock *sk)
 }
 
 /* called with rcu_read_lock() */
-int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
+int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u8 proto)
 {
 	struct ip_mc_list *im;
 	struct ip_mc_list __rcu **mc_hash;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61b45a17fc73..1feb15f23de8 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -330,14 +330,12 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 		if (error)
 			goto out_err;
 	}
-	req = reqsk_queue_remove(queue);
+	req = reqsk_queue_remove(queue, sk);
 	newsk = req->sk;
 
-	sk_acceptq_removed(sk);
 	if (sk->sk_protocol == IPPROTO_TCP &&
-	    tcp_rsk(req)->tfo_listener &&
-	    queue->fastopenq) {
-		spin_lock_bh(&queue->fastopenq->lock);
+	    tcp_rsk(req)->tfo_listener) {
+		spin_lock_bh(&queue->fastopenq.lock);
 		if (tcp_rsk(req)->tfo_listener) {
 			/* We are still waiting for the final ACK from 3WHS
 			 * so can't free req now. Instead, we set req->sk to
@@ -348,7 +346,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
 			req->sk = NULL;
 			req = NULL;
 		}
-		spin_unlock_bh(&queue->fastopenq->lock);
+		spin_unlock_bh(&queue->fastopenq.lock);
 	}
 out:
 	release_sock(sk);
@@ -408,7 +406,7 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
 }
 EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
 
-struct dst_entry *inet_csk_route_req(struct sock *sk,
+struct dst_entry *inet_csk_route_req(const struct sock *sk,
 				     struct flowi4 *fl4,
 				     const struct request_sock *req)
 {
@@ -439,7 +437,7 @@ no_route:
 }
 EXPORT_SYMBOL_GPL(inet_csk_route_req);
 
-struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
+struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 					    struct sock *newsk,
 					    const struct request_sock *req)
 {
@@ -478,65 +476,12 @@ no_route:
 }
 EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
 
-static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
-				 const u32 rnd, const u32 synq_hsize)
-{
-	return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
-}
-
 #if IS_ENABLED(CONFIG_IPV6)
 #define AF_INET_FAMILY(fam) ((fam) == AF_INET)
 #else
 #define AF_INET_FAMILY(fam) true
 #endif
 
-/* Note: this is temporary :
- * req sock will no longer be in listener hash table
-*/
-struct request_sock *inet_csk_search_req(struct sock *sk,
-					 const __be16 rport,
-					 const __be32 raddr,
-					 const __be32 laddr)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	struct request_sock *req;
-	u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd,
-				  lopt->nr_table_entries);
-
-	spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
-	for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
-		const struct inet_request_sock *ireq = inet_rsk(req);
-
-		if (ireq->ir_rmt_port == rport &&
-		    ireq->ir_rmt_addr == raddr &&
-		    ireq->ir_loc_addr == laddr &&
-		    AF_INET_FAMILY(req->rsk_ops->family)) {
-			atomic_inc(&req->rsk_refcnt);
-			WARN_ON(req->sk);
-			break;
-		}
-	}
-	spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
-
-	return req;
-}
-EXPORT_SYMBOL_GPL(inet_csk_search_req);
-
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
-				   unsigned long timeout)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
-				     inet_rsk(req)->ir_rmt_port,
-				     lopt->hash_rnd, lopt->nr_table_entries);
-
-	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
-	inet_csk_reqsk_queue_added(sk, timeout);
-}
-EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
-
 /* Only thing we need from tcp.h */
 extern int sysctl_tcp_synack_retries;
 
@@ -563,7 +508,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
 		  req->num_timeout >= rskq_defer_accept - 1;
 }
 
-int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
+int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
 {
 	int err = req->rsk_ops->rtx_syn_ack(parent, req);
 
@@ -573,27 +518,20 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
 }
 EXPORT_SYMBOL(inet_rtx_syn_ack);
 
-/* return true if req was found in the syn_table[] */
+/* return true if req was found in the ehash table */
 static bool reqsk_queue_unlink(struct request_sock_queue *queue,
 			       struct request_sock *req)
 {
-	struct request_sock **prev;
-	struct listen_sock *lopt;
+	struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
 	bool found = false;
 
-	spin_lock(&queue->syn_wait_lock);
-	lopt = queue->listen_opt;
-	if (lopt) {
-		for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL;
-		     prev = &(*prev)->dl_next) {
-			if (*prev == req) {
-				*prev = req->dl_next;
-				found = true;
-				break;
-			}
-		}
+	if (sk_hashed(req_to_sk(req))) {
+		spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
+
+		spin_lock(lock);
+		found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
+		spin_unlock(lock);
 	}
-	spin_unlock(&queue->syn_wait_lock);
 	if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
 		reqsk_put(req);
 	return found;
@@ -608,21 +546,25 @@ void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
 }
 EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
 
+void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
+{
+	inet_csk_reqsk_queue_drop(sk, req);
+	reqsk_put(req);
+}
+EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
+
 static void reqsk_timer_handler(unsigned long data)
 {
 	struct request_sock *req = (struct request_sock *)data;
 	struct sock *sk_listener = req->rsk_listener;
 	struct inet_connection_sock *icsk = inet_csk(sk_listener);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
-	struct listen_sock *lopt = queue->listen_opt;
 	int qlen, expire = 0, resend = 0;
 	int max_retries, thresh;
 	u8 defer_accept;
 
-	if (sk_listener->sk_state != TCP_LISTEN || !lopt) {
-		reqsk_put(req);
-		return;
-	}
+	if (sk_listener->sk_state != TCP_LISTEN)
+		goto drop;
 
 	max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
 	thresh = max_retries;
@@ -643,9 +585,9 @@ static void reqsk_timer_handler(unsigned long data)
 	 * embrions; and abort old ones without pity, if old
 	 * ones are about to clog our table.
 	 */
-	qlen = listen_sock_qlen(lopt);
-	if (qlen >> (lopt->max_qlen_log - 1)) {
-		int young = listen_sock_young(lopt) << 1;
+	qlen = reqsk_queue_len(queue);
+	if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
+		int young = reqsk_queue_len_young(queue) << 1;
 
 		while (thresh > 2) {
 			if (qlen < young)
@@ -667,41 +609,40 @@ static void reqsk_timer_handler(unsigned long data)
 		unsigned long timeo;
 
 		if (req->num_timeout++ == 0)
-			atomic_inc(&lopt->young_dec);
+			atomic_dec(&queue->young);
 		timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
 		mod_timer_pinned(&req->rsk_timer, jiffies + timeo);
 		return;
 	}
-	inet_csk_reqsk_queue_drop(sk_listener, req);
-	reqsk_put(req);
+drop:
+	inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
 }
 
-void reqsk_queue_hash_req(struct request_sock_queue *queue,
-			  u32 hash, struct request_sock *req,
-			  unsigned long timeout)
+static void reqsk_queue_hash_req(struct request_sock *req,
+				 unsigned long timeout)
 {
-	struct listen_sock *lopt = queue->listen_opt;
-
 	req->num_retrans = 0;
 	req->num_timeout = 0;
 	req->sk = NULL;
 
 	setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
 	mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
-	req->rsk_hash = hash;
 
+	inet_ehash_insert(req_to_sk(req), NULL);
 	/* before letting lookups find us, make sure all req fields
 	 * are committed to memory and refcnt initialized.
 	 */
 	smp_wmb();
-	atomic_set(&req->rsk_refcnt, 2);
+	atomic_set(&req->rsk_refcnt, 2 + 1);
+}
 
-	spin_lock(&queue->syn_wait_lock);
-	req->dl_next = lopt->syn_table[hash];
-	lopt->syn_table[hash] = req;
-	spin_unlock(&queue->syn_wait_lock);
+void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+				   unsigned long timeout)
+{
+	reqsk_queue_hash_req(req, timeout);
+	inet_csk_reqsk_queue_added(sk);
 }
-EXPORT_SYMBOL(reqsk_queue_hash_req);
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
 
 /**
  *	inet_csk_clone_lock - clone an inet socket, and lock its clone
@@ -792,16 +733,14 @@ void inet_csk_prepare_forced_close(struct sock *sk)
 }
 EXPORT_SYMBOL(inet_csk_prepare_forced_close);
 
-int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
+int inet_csk_listen_start(struct sock *sk, int backlog)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
+	struct inet_sock *inet = inet_sk(sk);
 
-	if (rc != 0)
-		return rc;
+	reqsk_queue_alloc(&icsk->icsk_accept_queue);
 
-	sk->sk_max_ack_backlog = 0;
+	sk->sk_max_ack_backlog = backlog;
 	sk->sk_ack_backlog = 0;
 	inet_csk_delack_init(sk);
 
@@ -821,11 +760,76 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 	}
 
 	sk->sk_state = TCP_CLOSE;
-	__reqsk_queue_destroy(&icsk->icsk_accept_queue);
 	return -EADDRINUSE;
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
 
+static void inet_child_forget(struct sock *sk, struct request_sock *req,
+			      struct sock *child)
+{
+	sk->sk_prot->disconnect(child, O_NONBLOCK);
+
+	sock_orphan(child);
+
+	percpu_counter_inc(sk->sk_prot->orphan_count);
+
+	if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
+		BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+		BUG_ON(sk != req->rsk_listener);
+
+		/* Paranoid, to prevent race condition if
+		 * an inbound pkt destined for child is
+		 * blocked by sock lock in tcp_v4_rcv().
+		 * Also to satisfy an assertion in
+		 * tcp_v4_destroy_sock().
+		 */
+		tcp_sk(child)->fastopen_rsk = NULL;
+	}
+	inet_csk_destroy_sock(child);
+	reqsk_put(req);
+}
+
+void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
+			      struct sock *child)
+{
+	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+
+	spin_lock(&queue->rskq_lock);
+	if (unlikely(sk->sk_state != TCP_LISTEN)) {
+		inet_child_forget(sk, req, child);
+	} else {
+		req->sk = child;
+		req->dl_next = NULL;
+		if (queue->rskq_accept_head == NULL)
+			queue->rskq_accept_head = req;
+		else
+			queue->rskq_accept_tail->dl_next = req;
+		queue->rskq_accept_tail = req;
+		sk_acceptq_added(sk);
+	}
+	spin_unlock(&queue->rskq_lock);
+}
+EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
+
+struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
+					 struct request_sock *req, bool own_req)
+{
+	if (own_req) {
+		inet_csk_reqsk_queue_drop(sk, req);
+		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
+		inet_csk_reqsk_queue_add(sk, req, child);
+		/* Warning: caller must not call reqsk_put(req);
+		 * child stole last reference on it.
+		 */
+		return child;
+	}
+	/* Too bad, another child took ownership of the request, undo. */
+	bh_unlock_sock(child);
+	sock_put(child);
+	return NULL;
+}
+EXPORT_SYMBOL(inet_csk_complete_hashdance);
+
 /*
  *	This routine closes sockets which have been at least partially
  *	opened, but not yet accepted.
@@ -834,11 +838,7 @@ void inet_csk_listen_stop(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
-	struct request_sock *acc_req;
-	struct request_sock *req;
-
-	/* make all the listen_opt local to us */
-	acc_req = reqsk_queue_yank_acceptq(queue);
+	struct request_sock *next, *req;
 
 	/* Following specs, it would be better either to send FIN
 	 * (and enter FIN-WAIT-1, it is normal close)
@@ -848,57 +848,34 @@ void inet_csk_listen_stop(struct sock *sk)
 	 * To be honest, we are not able to make either
 	 * of the variants now.			--ANK
 	 */
-	reqsk_queue_destroy(queue);
-
-	while ((req = acc_req) != NULL) {
+	while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
 		struct sock *child = req->sk;
 
-		acc_req = req->dl_next;
-
 		local_bh_disable();
 		bh_lock_sock(child);
 		WARN_ON(sock_owned_by_user(child));
 		sock_hold(child);
 
-		sk->sk_prot->disconnect(child, O_NONBLOCK);
-
-		sock_orphan(child);
-
-		percpu_counter_inc(sk->sk_prot->orphan_count);
-
-		if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
-			BUG_ON(tcp_sk(child)->fastopen_rsk != req);
-			BUG_ON(sk != req->rsk_listener);
-
-			/* Paranoid, to prevent race condition if
-			 * an inbound pkt destined for child is
-			 * blocked by sock lock in tcp_v4_rcv().
-			 * Also to satisfy an assertion in
-			 * tcp_v4_destroy_sock().
-			 */
-			tcp_sk(child)->fastopen_rsk = NULL;
-		}
-		inet_csk_destroy_sock(child);
-
+		inet_child_forget(sk, req, child);
 		bh_unlock_sock(child);
 		local_bh_enable();
 		sock_put(child);
 
-		sk_acceptq_removed(sk);
-		reqsk_put(req);
+		cond_resched();
 	}
-	if (queue->fastopenq) {
+	if (queue->fastopenq.rskq_rst_head) {
 		/* Free all the reqs queued in rskq_rst_head. */
-		spin_lock_bh(&queue->fastopenq->lock);
-		acc_req = queue->fastopenq->rskq_rst_head;
-		queue->fastopenq->rskq_rst_head = NULL;
-		spin_unlock_bh(&queue->fastopenq->lock);
-		while ((req = acc_req) != NULL) {
-			acc_req = req->dl_next;
+		spin_lock_bh(&queue->fastopenq.lock);
+		req = queue->fastopenq.rskq_rst_head;
+		queue->fastopenq.rskq_rst_head = NULL;
+		spin_unlock_bh(&queue->fastopenq.lock);
+		while (req != NULL) {
+			next = req->dl_next;
 			reqsk_put(req);
+			req = next;
 		}
 	}
-	WARN_ON(sk->sk_ack_backlog);
+	WARN_ON_ONCE(sk->sk_ack_backlog);
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c3b1f3a0f4cf..ab9f8a66615d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -730,91 +730,21 @@ static void twsk_build_assert(void)
 #endif
 }
 
-static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
-			       struct netlink_callback *cb,
-			       const struct inet_diag_req_v2 *r,
-			       const struct nlattr *bc)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct inet_sock *inet = inet_sk(sk);
-	struct inet_diag_entry entry;
-	int j, s_j, reqnum, s_reqnum;
-	struct listen_sock *lopt;
-	int err = 0;
-
-	s_j = cb->args[3];
-	s_reqnum = cb->args[4];
-
-	if (s_j > 0)
-		s_j--;
-
-	entry.family = sk->sk_family;
-
-	spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
-
-	lopt = icsk->icsk_accept_queue.listen_opt;
-	if (!lopt || !listen_sock_qlen(lopt))
-		goto out;
-
-	if (bc) {
-		entry.sport = inet->inet_num;
-		entry.userlocks = sk->sk_userlocks;
-	}
-
-	for (j = s_j; j < lopt->nr_table_entries; j++) {
-		struct request_sock *req, *head = lopt->syn_table[j];
-
-		reqnum = 0;
-		for (req = head; req; reqnum++, req = req->dl_next) {
-			struct inet_request_sock *ireq = inet_rsk(req);
-
-			if (reqnum < s_reqnum)
-				continue;
-			if (r->id.idiag_dport != ireq->ir_rmt_port &&
-			    r->id.idiag_dport)
-				continue;
-
-			if (bc) {
-				/* Note: entry.sport and entry.userlocks are already set */
-				entry_fill_addrs(&entry, req_to_sk(req));
-				entry.dport = ntohs(ireq->ir_rmt_port);
-
-				if (!inet_diag_bc_run(bc, &entry))
-					continue;
-			}
-
-			err = inet_req_diag_fill(req_to_sk(req), skb,
-						 NETLINK_CB(cb->skb).portid,
-						 cb->nlh->nlmsg_seq,
-						 NLM_F_MULTI, cb->nlh);
-			if (err < 0) {
-				cb->args[3] = j + 1;
-				cb->args[4] = reqnum;
-				goto out;
-			}
-		}
-
-		s_reqnum = 0;
-	}
-
-out:
-	spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
-
-	return err;
-}
-
 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 			 struct netlink_callback *cb,
 			 const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
 	struct net *net = sock_net(skb->sk);
 	int i, num, s_i, s_num;
+	u32 idiag_states = r->idiag_states;
 
+	if (idiag_states & TCPF_SYN_RECV)
+		idiag_states |= TCPF_NEW_SYN_RECV;
 	s_i = cb->args[1];
 	s_num = num = cb->args[2];
 
 	if (cb->args[0] == 0) {
-		if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV)))
+		if (!(idiag_states & TCPF_LISTEN))
 			goto skip_listen_ht;
 
 		for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
@@ -844,21 +774,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 				    r->id.idiag_sport)
 					goto next_listen;
 
-				if (!(r->idiag_states & TCPF_LISTEN) ||
-				    r->id.idiag_dport ||
+				if (r->id.idiag_dport ||
 				    cb->args[3] > 0)
-					goto syn_recv;
-
-				if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
-					spin_unlock_bh(&ilb->lock);
-					goto done;
-				}
-
-syn_recv:
-				if (!(r->idiag_states & TCPF_SYN_RECV))
 					goto next_listen;
 
-				if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) {
+				if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
 					spin_unlock_bh(&ilb->lock);
 					goto done;
 				}
@@ -879,7 +799,7 @@ skip_listen_ht:
 		s_i = num = s_num = 0;
 	}
 
-	if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
+	if (!(idiag_states & ~TCPF_LISTEN))
 		goto out;
 
 	for (i = s_i; i <= hashinfo->ehash_mask; i++) {
@@ -906,7 +826,7 @@ skip_listen_ht:
 				goto next_normal;
 			state = (sk->sk_state == TCP_TIME_WAIT) ?
 				inet_twsk(sk)->tw_substate : sk->sk_state;
-			if (!(r->idiag_states & (1 << state)))
+			if (!(idiag_states & (1 << state)))
 				goto next_normal;
 			if (r->sdiag_family != AF_UNSPEC &&
 			    sk->sk_family != r->sdiag_family)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index d0a7c0319e3d..fe144dae7372 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -209,12 +209,6 @@ int inet_frags_init(struct inet_frags *f)
 }
 EXPORT_SYMBOL(inet_frags_init);
 
-void inet_frags_init_net(struct netns_frags *nf)
-{
-	init_frag_mem_limit(nf);
-}
-EXPORT_SYMBOL(inet_frags_init_net);
-
 void inet_frags_fini(struct inet_frags *f)
 {
 	cancel_work_sync(&f->frags_work);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 89120196a949..ccc5980797fc 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -126,7 +126,7 @@ void inet_put_port(struct sock *sk)
 }
 EXPORT_SYMBOL(inet_put_port);
 
-int __inet_inherit_port(struct sock *sk, struct sock *child)
+int __inet_inherit_port(const struct sock *sk, struct sock *child)
 {
 	struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
 	unsigned short port = inet_sk(child)->inet_num;
@@ -137,6 +137,10 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
 
 	spin_lock(&head->lock);
 	tb = inet_csk(sk)->icsk_bind_hash;
+	if (unlikely(!tb)) {
+		spin_unlock(&head->lock);
+		return -ENOENT;
+	}
 	if (tb->port != port) {
 		/* NOTE: using tproxy and redirecting skbs to a proxy
 		 * on a different listener port breaks the assumption
@@ -185,6 +189,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				return -1;
 			score += 4;
 		}
+		if (sk->sk_incoming_cpu == raw_smp_processor_id())
+			score++;
 	}
 	return score;
 }
@@ -398,14 +404,18 @@ static u32 inet_sk_port_offset(const struct sock *sk)
 					  inet->inet_dport);
 }
 
-void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
+/* insert a socket into ehash, and eventually remove another one
+ * (The another one can be a SYN_RECV or TIMEWAIT
+ */
+bool inet_ehash_insert(struct sock *sk, struct sock *osk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct hlist_nulls_head *list;
 	struct inet_ehash_bucket *head;
 	spinlock_t *lock;
+	bool ret = true;
 
-	WARN_ON(!sk_unhashed(sk));
+	WARN_ON_ONCE(!sk_unhashed(sk));
 
 	sk->sk_hash = sk_ehashfn(sk);
 	head = inet_ehash_bucket(hashinfo, sk->sk_hash);
@@ -413,24 +423,41 @@ void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
 	lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
 
 	spin_lock(lock);
-	__sk_nulls_add_node_rcu(sk, list);
 	if (osk) {
-		WARN_ON(sk->sk_hash != osk->sk_hash);
-		sk_nulls_del_node_init_rcu(osk);
+		WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
+		ret = sk_nulls_del_node_init_rcu(osk);
 	}
+	if (ret)
+		__sk_nulls_add_node_rcu(sk, list);
 	spin_unlock(lock);
-	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	return ret;
+}
+
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
+{
+	bool ok = inet_ehash_insert(sk, osk);
+
+	if (ok) {
+		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+	} else {
+		percpu_counter_inc(sk->sk_prot->orphan_count);
+		sk->sk_state = TCP_CLOSE;
+		sock_set_flag(sk, SOCK_DEAD);
+		inet_csk_destroy_sock(sk);
+	}
+	return ok;
 }
-EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
+EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
 
 void __inet_hash(struct sock *sk, struct sock *osk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct inet_listen_hashbucket *ilb;
 
-	if (sk->sk_state != TCP_LISTEN)
-		return __inet_hash_nolisten(sk, osk);
-
+	if (sk->sk_state != TCP_LISTEN) {
+		inet_ehash_nolisten(sk, osk);
+		return;
+	}
 	WARN_ON(!sk_unhashed(sk));
 	ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
 
@@ -551,7 +578,7 @@ ok:
 		inet_bind_hash(sk, tb, port);
 		if (sk_unhashed(sk)) {
 			inet_sk(sk)->inet_sport = htons(port);
-			__inet_hash_nolisten(sk, (struct sock *)tw);
+			inet_ehash_nolisten(sk, (struct sock *)tw);
 		}
 		if (tw)
 			inet_twsk_bind_unhash(tw, hinfo);
@@ -568,7 +595,7 @@ ok:
 	tb  = inet_csk(sk)->icsk_bind_hash;
 	spin_lock_bh(&head->lock);
 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__inet_hash_nolisten(sk, NULL);
+		inet_ehash_nolisten(sk, NULL);
 		spin_unlock_bh(&head->lock);
 		return 0;
 	} else {
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 2d3aa408fbdc..da0d7ce85844 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -61,18 +61,18 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 }
 
 
-static int ip_forward_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct ip_options *opt	= &(IPCB(skb)->opt);
 
-	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
-	IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
 	skb_sender_cpu_clear(skb);
-	return dst_output_sk(sk, skb);
+	return dst_output(net, sk, skb);
 }
 
 int ip_forward(struct sk_buff *skb)
@@ -81,6 +81,7 @@ int ip_forward(struct sk_buff *skb)
 	struct iphdr *iph;	/* Our header */
 	struct rtable *rt;	/* Route we use */
 	struct ip_options *opt	= &(IPCB(skb)->opt);
+	struct net *net;
 
 	/* that should never happen */
 	if (skb->pkt_type != PACKET_HOST)
@@ -99,6 +100,7 @@ int ip_forward(struct sk_buff *skb)
 		return NET_RX_SUCCESS;
 
 	skb_forward_csum(skb);
+	net = dev_net(skb->dev);
 
 	/*
 	 *	According to the RFC, we must first decrease the TTL field. If
@@ -119,7 +121,7 @@ int ip_forward(struct sk_buff *skb)
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
 	mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
 	if (ip_exceeds_mtu(skb, mtu)) {
-		IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(mtu));
 		goto drop;
@@ -143,8 +145,9 @@ int ip_forward(struct sk_buff *skb)
 
 	skb->priority = rt_tos2priority(iph->tos);
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb,
-		       skb->dev, rt->dst.dev, ip_forward_finish);
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
+		       net, NULL, skb, skb->dev, rt->dst.dev,
+		       ip_forward_finish);
 
 sr_failed:
 	/*
@@ -155,7 +158,7 @@ sr_failed:
 
 too_many_hops:
 	/* Tell the sender its packet died... */
-	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
 	icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
 drop:
 	kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fa7f15305f9a..1fe55ae81781 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -48,7 +48,7 @@
 #include <linux/inet.h>
 #include <linux/netfilter_ipv4.h>
 #include <net/inet_ecn.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
 
 /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -78,7 +78,7 @@ struct ipq {
 	u8		ecn; /* RFC3168 support */
 	u16		max_df_size; /* largest frag with DF set seen */
 	int             iif;
-	int             vif;   /* VRF device index */
+	int             vif;   /* L3 master device index */
 	unsigned int    rid;
 	struct inet_peer *peer;
 };
@@ -654,11 +654,10 @@ out_fail:
 }
 
 /* Process an incoming IP datagram fragment. */
-int ip_defrag(struct sk_buff *skb, u32 user)
+int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
 {
 	struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
-	int vif = vrf_master_ifindex_rcu(dev);
-	struct net *net = dev_net(dev);
+	int vif = l3mdev_master_ifindex_rcu(dev);
 	struct ipq *qp;
 
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
@@ -683,7 +682,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 }
 EXPORT_SYMBOL(ip_defrag);
 
-struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
+struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
 {
 	struct iphdr iph;
 	int netoff;
@@ -712,7 +711,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
 			if (pskb_trim_rcsum(skb, netoff + len))
 				return skb;
 			memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
-			if (ip_defrag(skb, user))
+			if (ip_defrag(net, skb, user))
 				return NULL;
 			skb_clear_hash(skb);
 		}
@@ -840,6 +839,8 @@ static void __init ip4_frags_ctl_register(void)
 
 static int __net_init ipv4_frags_init_net(struct net *net)
 {
+	int res;
+
 	/* Fragment cache limits.
 	 *
 	 * The fragment memory accounting code, (tries to) account for
@@ -863,9 +864,13 @@ static int __net_init ipv4_frags_init_net(struct net *net)
 	 */
 	net->ipv4.frags.timeout = IP_FRAG_TIME;
 
-	inet_frags_init_net(&net->ipv4.frags);
-
-	return ip4_frags_ns_ctl_register(net);
+	res = inet_frags_init_net(&net->ipv4.frags);
+	if (res)
+		return res;
+	res = ip4_frags_ns_ctl_register(net);
+	if (res)
+		inet_frags_uninit_net(&net->ipv4.frags);
+	return res;
 }
 
 static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f4fc8a77aaa7..b1209b63381f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -157,6 +157,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 	u8 protocol = ip_hdr(skb)->protocol;
 	struct sock *last = NULL;
 	struct net_device *dev = skb->dev;
+	struct net *net = dev_net(dev);
 
 	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
 		struct sock *sk = ra->sk;
@@ -167,9 +168,9 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 		if (sk && inet_sk(sk)->inet_num == protocol &&
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == dev->ifindex) &&
-		    net_eq(sock_net(sk), dev_net(dev))) {
+		    net_eq(sock_net(sk), net)) {
 			if (ip_is_fragment(ip_hdr(skb))) {
-				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
+				if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
 					return true;
 			}
 			if (last) {
@@ -188,10 +189,8 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 	return false;
 }
 
-static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb->dev);
-
 	__skb_pull(skb, skb_network_header_len(skb));
 
 	rcu_read_lock();
@@ -248,14 +247,15 @@ int ip_local_deliver(struct sk_buff *skb)
 	/*
 	 *	Reassemble IP fragments.
 	 */
+	struct net *net = dev_net(skb->dev);
 
 	if (ip_is_fragment(ip_hdr(skb))) {
-		if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
+		if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
 			return 0;
 	}
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
+		       net, NULL, skb, skb->dev, NULL,
 		       ip_local_deliver_finish);
 }
 
@@ -311,7 +311,7 @@ drop:
 int sysctl_ip_early_demux __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_ip_early_demux);
 
-static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
@@ -337,8 +337,7 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
 					       iph->tos, skb->dev);
 		if (unlikely(err)) {
 			if (err == -EXDEV)
-				NET_INC_STATS_BH(dev_net(skb->dev),
-						 LINUX_MIB_IPRPFILTER);
+				NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER);
 			goto drop;
 		}
 	}
@@ -359,11 +358,9 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt->rt_type == RTN_MULTICAST) {
-		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
-				skb->len);
+		IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
 	} else if (rt->rt_type == RTN_BROADCAST)
-		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
-				skb->len);
+		IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
 
 	return dst_input(skb);
 
@@ -378,6 +375,7 @@ drop:
 int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
 	const struct iphdr *iph;
+	struct net *net;
 	u32 len;
 
 	/* When the interface is in promisc. mode, drop all the crap
@@ -387,11 +385,12 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 		goto drop;
 
 
-	IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);
+	net = dev_net(dev);
+	IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (!skb) {
-		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
 		goto out;
 	}
 
@@ -417,7 +416,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
 	BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
 	BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
-	IP_ADD_STATS_BH(dev_net(dev),
+	IP_ADD_STATS_BH(net,
 			IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
 			max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
 
@@ -431,7 +430,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 
 	len = ntohs(iph->tot_len);
 	if (skb->len < len) {
-		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
 		goto drop;
 	} else if (len < (iph->ihl*4))
 		goto inhdr_error;
@@ -441,7 +440,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	 * Note this now means skb->len holds ntohs(iph->tot_len).
 	 */
 	if (pskb_trim_rcsum(skb, len)) {
-		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
+		IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
 		goto drop;
 	}
 
@@ -453,14 +452,14 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
-		       dev, NULL,
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
 		       ip_rcv_finish);
 
 csum_error:
-	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS);
 inhdr_error:
-	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
+	IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
 drop:
 	kfree_skb(skb);
 out:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0138fada0951..4233cbe47052 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,9 +83,10 @@
 int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
 EXPORT_SYMBOL(sysctl_ip_default_ttl);
 
-static int ip_fragment(struct sock *sk, struct sk_buff *skb,
-		       unsigned int mtu,
-		       int (*output)(struct sock *, struct sk_buff *));
+static int
+ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+	    unsigned int mtu,
+	    int (*output)(struct net *, struct sock *, struct sk_buff *));
 
 /* Generate a checksum for an outgoing IP datagram. */
 void ip_send_check(struct iphdr *iph)
@@ -95,32 +96,28 @@ void ip_send_check(struct iphdr *iph)
 }
 EXPORT_SYMBOL(ip_send_check);
 
-static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
-	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL,
-		       skb_dst(skb)->dev, dst_output_sk);
+	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+		       net, sk, skb, NULL, skb_dst(skb)->dev,
+		       dst_output);
 }
 
-int __ip_local_out(struct sk_buff *skb)
-{
-	return __ip_local_out_sk(skb->sk, skb);
-}
-
-int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	int err;
 
-	err = __ip_local_out(skb);
+	err = __ip_local_out(net, sk, skb);
 	if (likely(err == 1))
-		err = dst_output_sk(sk, skb);
+		err = dst_output(net, sk, skb);
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(ip_local_out_sk);
+EXPORT_SYMBOL_GPL(ip_local_out);
 
 static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
 {
@@ -135,11 +132,12 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
  *		Add an ip header to a skbuff and send it out.
  *
  */
-int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
+int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
 			  __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct rtable *rt = skb_rtable(skb);
+	struct net *net = sock_net(sk);
 	struct iphdr *iph;
 
 	/* Build the IP header. */
@@ -149,15 +147,17 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->version  = 4;
 	iph->ihl      = 5;
 	iph->tos      = inet->tos;
-	if (ip_dont_fragment(sk, &rt->dst))
-		iph->frag_off = htons(IP_DF);
-	else
-		iph->frag_off = 0;
 	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
 	iph->saddr    = saddr;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(sock_net(sk), skb, sk);
+	if (ip_dont_fragment(sk, &rt->dst)) {
+		iph->frag_off = htons(IP_DF);
+		iph->id = 0;
+	} else {
+		iph->frag_off = 0;
+		__ip_select_ident(net, iph, 1);
+	}
 
 	if (opt && opt->opt.optlen) {
 		iph->ihl += opt->opt.optlen>>2;
@@ -168,11 +168,11 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	skb->mark = sk->sk_mark;
 
 	/* Send it out. */
-	return ip_local_out(skb);
+	return ip_local_out(net, skb->sk, skb);
 }
 EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 
-static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct rtable *rt = (struct rtable *)dst;
@@ -182,9 +182,9 @@ static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
 	u32 nexthop;
 
 	if (rt->rt_type == RTN_MULTICAST) {
-		IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
+		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
 	} else if (rt->rt_type == RTN_BROADCAST)
-		IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
+		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
 
 	/* Be paranoid, rather than too clever. */
 	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -220,8 +220,8 @@ static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
 	return -EINVAL;
 }
 
-static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
-				unsigned int mtu)
+static int ip_finish_output_gso(struct net *net, struct sock *sk,
+				struct sk_buff *skb, unsigned int mtu)
 {
 	netdev_features_t features;
 	struct sk_buff *segs;
@@ -230,7 +230,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
 	/* common case: locally created skb or seglen is <= mtu */
 	if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
 	      skb_gso_network_seglen(skb) <= mtu)
-		return ip_finish_output2(sk, skb);
+		return ip_finish_output2(net, sk, skb);
 
 	/* Slowpath -  GSO segment length is exceeding the dst MTU.
 	 *
@@ -253,7 +253,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
 		int err;
 
 		segs->next = NULL;
-		err = ip_fragment(sk, segs, mtu, ip_finish_output2);
+		err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
 
 		if (err && ret == 0)
 			ret = err;
@@ -263,7 +263,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
 	return ret;
 }
 
-static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int mtu;
 
@@ -271,20 +271,20 @@ static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
 	/* Policy lookup after SNAT yielded a new policy */
 	if (skb_dst(skb)->xfrm) {
 		IPCB(skb)->flags |= IPSKB_REROUTED;
-		return dst_output_sk(sk, skb);
+		return dst_output(net, sk, skb);
 	}
 #endif
 	mtu = ip_skb_dst_mtu(skb);
 	if (skb_is_gso(skb))
-		return ip_finish_output_gso(sk, skb, mtu);
+		return ip_finish_output_gso(net, sk, skb, mtu);
 
 	if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
-		return ip_fragment(sk, skb, mtu, ip_finish_output2);
+		return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
 
-	return ip_finish_output2(sk, skb);
+	return ip_finish_output2(net, sk, skb);
 }
 
-int ip_mc_output(struct sock *sk, struct sk_buff *skb)
+int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct rtable *rt = skb_rtable(skb);
 	struct net_device *dev = rt->dst.dev;
@@ -292,7 +292,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
 	/*
 	 *	If the indicated interface is up and running, send the packet.
 	 */
-	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
+	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
@@ -320,7 +320,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 			if (newskb)
 				NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
-					sk, newskb, NULL, newskb->dev,
+					net, sk, newskb, NULL, newskb->dev,
 					dev_loopback_xmit);
 		}
 
@@ -335,26 +335,28 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
 	if (rt->rt_flags&RTCF_BROADCAST) {
 		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 		if (newskb)
-			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb,
-				NULL, newskb->dev, dev_loopback_xmit);
+			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+				net, sk, newskb, NULL, newskb->dev,
+				dev_loopback_xmit);
 	}
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL,
-			    skb->dev, ip_finish_output,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, skb->dev,
+			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
-int ip_output(struct sock *sk, struct sk_buff *skb)
+int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb_dst(skb)->dev;
 
-	IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
+	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
 
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, dev,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, dev,
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
@@ -377,6 +379,7 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
 int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ip_options_rcu *inet_opt;
 	struct flowi4 *fl4;
 	struct rtable *rt;
@@ -407,7 +410,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
 		 * keep trying until route appears or the connection times
 		 * itself out.
 		 */
-		rt = ip_route_output_ports(sock_net(sk), fl4, sk,
+		rt = ip_route_output_ports(net, fl4, sk,
 					   daddr, inet->inet_saddr,
 					   inet->inet_dport,
 					   inet->inet_sport,
@@ -444,20 +447,20 @@ packet_routed:
 		ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_segs(sock_net(sk), skb, sk,
+	ip_select_ident_segs(net, skb, sk,
 			     skb_shinfo(skb)->gso_segs ?: 1);
 
 	/* TODO : should we use skb->sk here instead of sk ? */
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
-	res = ip_local_out(skb);
+	res = ip_local_out(net, sk, skb);
 	rcu_read_unlock();
 	return res;
 
 no_route:
 	rcu_read_unlock();
-	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+	IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
 	kfree_skb(skb);
 	return -EHOSTUNREACH;
 }
@@ -486,29 +489,26 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
-static int ip_fragment(struct sock *sk, struct sk_buff *skb,
+static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 		       unsigned int mtu,
-		       int (*output)(struct sock *, struct sk_buff *))
+		       int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct iphdr *iph = ip_hdr(skb);
 
 	if ((iph->frag_off & htons(IP_DF)) == 0)
-		return ip_do_fragment(sk, skb, output);
+		return ip_do_fragment(net, sk, skb, output);
 
 	if (unlikely(!skb->ignore_df ||
 		     (IPCB(skb)->frag_max_size &&
 		      IPCB(skb)->frag_max_size > mtu))) {
-		struct rtable *rt = skb_rtable(skb);
-		struct net_device *dev = rt->dst.dev;
-
-		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 			  htonl(mtu));
 		kfree_skb(skb);
 		return -EMSGSIZE;
 	}
 
-	return ip_do_fragment(sk, skb, output);
+	return ip_do_fragment(net, sk, skb, output);
 }
 
 /*
@@ -518,8 +518,8 @@ static int ip_fragment(struct sock *sk, struct sk_buff *skb,
  *	single device frame, and queue such a frame for sending.
  */
 
-int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
-		   int (*output)(struct sock *, struct sk_buff *))
+int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		   int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct iphdr *iph;
 	int ptr;
@@ -533,6 +533,11 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
 
 	dev = rt->dst.dev;
 
+	/* for offloaded checksums cleanup checksum before fragmentation */
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    (err = skb_checksum_help(skb)))
+		goto fail;
+
 	/*
 	 *	Point into the IP datagram header.
 	 */
@@ -621,10 +626,10 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
 				ip_send_check(iph);
 			}
 
-			err = output(sk, skb);
+			err = output(net, sk, skb);
 
 			if (!err)
-				IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
+				IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
 			if (err || !frag)
 				break;
 
@@ -634,7 +639,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
 		}
 
 		if (err == 0) {
-			IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
+			IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
 			return 0;
 		}
 
@@ -643,7 +648,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
 			kfree_skb(frag);
 			frag = skb;
 		}
-		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 		return err;
 
 slow_path_clean:
@@ -657,9 +662,6 @@ slow_path_clean:
 	}
 
 slow_path:
-	/* for offloaded checksums cleanup checksum before fragmentation */
-	if ((skb->ip_summed == CHECKSUM_PARTIAL) && skb_checksum_help(skb))
-		goto fail;
 	iph = ip_hdr(skb);
 
 	left = skb->len - hlen;		/* Space per frame */
@@ -761,19 +763,19 @@ slow_path:
 
 		ip_send_check(iph);
 
-		err = output(sk, skb2);
+		err = output(net, sk, skb2);
 		if (err)
 			goto fail;
 
-		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
+		IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
 	}
 	consume_skb(skb);
-	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
+	IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
 	return err;
 
 fail:
 	kfree_skb(skb);
-	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+	IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 	return err;
 }
 EXPORT_SYMBOL(ip_do_fragment);
@@ -911,6 +913,7 @@ static int __ip_append_data(struct sock *sk,
 	if (transhdrlen &&
 	    length + fragheaderlen <= mtu &&
 	    rt->dst.dev->features & NETIF_F_V4_CSUM &&
+	    !(flags & MSG_MORE) &&
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
@@ -1434,7 +1437,7 @@ int ip_send_skb(struct net *net, struct sk_buff *skb)
 {
 	int err;
 
-	err = ip_local_out(skb);
+	err = ip_local_out(net, skb->sk, skb);
 	if (err) {
 		if (err > 0)
 			err = net_xmit_errno(err);
@@ -1561,7 +1564,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	}
 
 	oif = arg->bound_dev_if;
-	if (!oif && netif_index_is_vrf(net, skb->skb_iif))
+	if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
 		oif = skb->skb_iif;
 
 	flowi4_init_output(&fl4, oif,
@@ -1596,7 +1599,6 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 			  arg->csumoffset) = csum_fold(csum_add(nskb->csum,
 								arg->csum));
 		nskb->ip_summed = CHECKSUM_NONE;
-		skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
 		ip_push_pending_frames(sk, &fl4);
 	}
 out:
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c3c359ad66e3..5f73a7c03e27 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1251,11 +1251,22 @@ EXPORT_SYMBOL(compat_ip_setsockopt);
  *	the _received_ ones. The set sets the _sent_ ones.
  */
 
+static bool getsockopt_needs_rtnl(int optname)
+{
+	switch (optname) {
+	case IP_MSFILTER:
+	case MCAST_MSFILTER:
+		return true;
+	}
+	return false;
+}
+
 static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 			    char __user *optval, int __user *optlen, unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
-	int val;
+	bool needs_rtnl = getsockopt_needs_rtnl(optname);
+	int val, err = 0;
 	int len;
 
 	if (level != SOL_IP)
@@ -1269,6 +1280,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	if (len < 0)
 		return -EINVAL;
 
+	if (needs_rtnl)
+		rtnl_lock();
 	lock_sock(sk);
 
 	switch (optname) {
@@ -1386,39 +1399,35 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_MSFILTER:
 	{
 		struct ip_msfilter msf;
-		int err;
 
 		if (len < IP_MSFILTER_SIZE(0)) {
-			release_sock(sk);
-			return -EINVAL;
+			err = -EINVAL;
+			goto out;
 		}
 		if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
-			release_sock(sk);
-			return -EFAULT;
+			err = -EFAULT;
+			goto out;
 		}
 		err = ip_mc_msfget(sk, &msf,
 				   (struct ip_msfilter __user *)optval, optlen);
-		release_sock(sk);
-		return err;
+		goto out;
 	}
 	case MCAST_MSFILTER:
 	{
 		struct group_filter gsf;
-		int err;
 
 		if (len < GROUP_FILTER_SIZE(0)) {
-			release_sock(sk);
-			return -EINVAL;
+			err = -EINVAL;
+			goto out;
 		}
 		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
-			release_sock(sk);
-			return -EFAULT;
+			err = -EFAULT;
+			goto out;
 		}
 		err = ip_mc_gsfget(sk, &gsf,
 				   (struct group_filter __user *)optval,
 				   optlen);
-		release_sock(sk);
-		return err;
+		goto out;
 	}
 	case IP_MULTICAST_ALL:
 		val = inet->mc_all;
@@ -1485,6 +1494,12 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 			return -EFAULT;
 	}
 	return 0;
+
+out:
+	release_sock(sk);
+	if (needs_rtnl)
+		rtnl_unlock();
+	return err;
 }
 
 int ip_getsockopt(struct sock *sk, int level,
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 84dce6a92f93..6cb9009c3d96 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -53,6 +53,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 		  __u8 tos, __u8 ttl, __be16 df, bool xnet)
 {
 	int pkt_len = skb->len - skb_inner_network_offset(skb);
+	struct net *net = dev_net(rt->dst.dev);
 	struct iphdr *iph;
 	int err;
 
@@ -76,10 +77,9 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 	iph->daddr	=	dst;
 	iph->saddr	=	src;
 	iph->ttl	=	ttl;
-	__ip_select_ident(dev_net(rt->dst.dev), iph,
-			  skb_shinfo(skb)->gso_segs ?: 1);
+	__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
 
-	err = ip_local_out_sk(sk, skb);
+	err = ip_local_out(net, sk, skb);
 	if (unlikely(net_xmit_eval(err)))
 		pkt_len = 0;
 	return pkt_len;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 0c152087ca15..4d8f0b698777 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -197,7 +197,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
 	skb_dst_set(skb, dst);
 	skb->dev = skb_dst(skb)->dev;
 
-	err = dst_output(skb);
+	err = dst_output(tunnel->net, skb->sk, skb);
 	if (net_xmit_eval(err) == 0)
 		err = skb->len;
 	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ed4ef09c2136..0bc7412d9e14 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -146,6 +146,10 @@ u8 root_server_path[256] = { 0, };	/* Path to mount as root */
 /* vendor class identifier */
 static char vendor_class_identifier[253] __initdata;
 
+#if defined(CONFIG_IP_PNP_DHCP)
+static char dhcp_client_identifier[253] __initdata;
+#endif
+
 /* Persistent data: */
 
 static int ic_proto_used;			/* Protocol used, if any */
@@ -728,6 +732,16 @@ ic_dhcp_init_options(u8 *options)
 			memcpy(e, vendor_class_identifier, len);
 			e += len;
 		}
+		len = strlen(dhcp_client_identifier + 1);
+		/* the minimum length of identifier is 2, include 1 byte type,
+		 * and can not be larger than the length of options
+		 */
+		if (len >= 1 && len < 312 - (e - options) - 1) {
+			*e++ = 61;
+			*e++ = len + 1;
+			memcpy(e, dhcp_client_identifier, len + 1);
+			e += len + 1;
+		}
 	}
 
 	*e++ = 255;	/* End of the list */
@@ -1557,8 +1571,24 @@ static int __init ic_proto_name(char *name)
 		return 0;
 	}
 #ifdef CONFIG_IP_PNP_DHCP
-	else if (!strcmp(name, "dhcp")) {
+	else if (!strncmp(name, "dhcp", 4)) {
+		char *client_id;
+
 		ic_proto_enabled &= ~IC_RARP;
+		client_id = strstr(name, "dhcp,");
+		if (client_id) {
+			char *v;
+
+			client_id = client_id + 5;
+			v = strchr(client_id, ',');
+			if (!v)
+				return 1;
+			*v = 0;
+			if (kstrtou8(client_id, 0, dhcp_client_identifier))
+				DBG("DHCP: Invalid client identifier type\n");
+			strncpy(dhcp_client_identifier + 1, v + 1, 251);
+			*v = ',';
+		}
 		return 1;
 	}
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 866ee89f5254..92dd4b74d513 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1678,17 +1678,18 @@ static void ip_encap(struct net *net, struct sk_buff *skb,
 	nf_reset(skb);
 }
 
-static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
+				      struct sk_buff *skb)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 
-	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
-	IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
+	IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
-	return dst_output_sk(sk, skb);
+	return dst_output(net, sk, skb);
 }
 
 /*
@@ -1745,7 +1746,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		 * to blackhole.
 		 */
 
-		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
 		ip_rt_put(rt);
 		goto out_free;
 	}
@@ -1787,8 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	 * not mrouter) cannot join to more than one interface - it will
 	 * result in receiving multiple packets.
 	 */
-	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb,
-		skb->dev, dev,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
+		net, NULL, skb, skb->dev, dev,
 		ipmr_forward_finish);
 	return;
 
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 61eafc9b4545..c3776ff6749f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,9 +17,8 @@
 #include <net/netfilter/nf_queue.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
+int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi4 fl4 = {};
@@ -104,7 +103,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb,
 	}
 }
 
-static int nf_ip_reroute(struct sk_buff *skb,
+static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
 			 const struct nf_queue_entry *entry)
 {
 	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -116,7 +115,7 @@ static int nf_ip_reroute(struct sk_buff *skb,
 		      skb->mark == rt_info->mark &&
 		      iph->daddr == rt_info->daddr &&
 		      iph->saddr == rt_info->saddr))
-			return ip_route_me_harder(skb, RTN_UNSPEC);
+			return ip_route_me_harder(net, skb, RTN_UNSPEC);
 	}
 	return 0;
 }
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8f87fc38ccde..11dccba474b7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -186,7 +186,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 	if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
 		dprintf("VIA in mismatch (%s vs %s).%s\n",
 			indev, arpinfo->iniface,
-			arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":"");
+			arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : "");
 		return 0;
 	}
 
@@ -195,7 +195,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
 	if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
 		dprintf("VIA out mismatch (%s vs %s).%s\n",
 			outdev, arpinfo->outiface,
-			arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":"");
+			arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : "");
 		return 0;
 	}
 
@@ -247,10 +247,10 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
 }
 
 unsigned int arpt_do_table(struct sk_buff *skb,
-			   unsigned int hook,
 			   const struct nf_hook_state *state,
 			   struct xt_table *table)
 {
+	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	unsigned int verdict = NF_DROP;
 	const struct arphdr *arp;
@@ -285,6 +285,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	 */
 	e = get_entry(table_base, private->hook_entry[hook]);
 
+	acpar.net     = state->net;
 	acpar.in      = state->in;
 	acpar.out     = state->out;
 	acpar.hooknum = hook;
@@ -467,7 +468,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 				pos = newpos;
 			}
 		}
-		next:
+next:
 		duprintf("Finished chain %u\n", hook);
 	}
 	return 1;
@@ -631,7 +632,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
  * newinfo).
  */
 static int translate_table(struct xt_table_info *newinfo, void *entry0,
-                           const struct arpt_replace *repl)
+			   const struct arpt_replace *repl)
 {
 	struct arpt_entry *iter;
 	unsigned int i;
@@ -891,7 +892,7 @@ static int compat_table_info(const struct xt_table_info *info,
 #endif
 
 static int get_info(struct net *net, void __user *user,
-                    const int *len, int compat)
+		    const int *len, int compat)
 {
 	char name[XT_TABLE_MAXNAMELEN];
 	struct xt_table *t;
@@ -1068,7 +1069,7 @@ static int __do_replace(struct net *net, const char *name,
 }
 
 static int do_replace(struct net *net, const void __user *user,
-                      unsigned int len)
+		      unsigned int len)
 {
 	int ret;
 	struct arpt_replace tmp;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 93876d03120c..1897ee160920 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,10 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c */
 static unsigned int
-arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+arptable_filter_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	const struct net *net = dev_net(state->in ? state->in : state->out);
-
-	return arpt_do_table(skb, ops->hooknum, state,
-			     net->ipv4.arptable_filter);
+	return arpt_do_table(skb, state, state->net->ipv4.arptable_filter);
 }
 
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b0a86e73451c..b99affad6ba1 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -102,7 +102,7 @@ ip_packet_match(const struct iphdr *ip,
 	if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
 		dprintf("VIA in mismatch (%s vs %s).%s\n",
 			indev, ipinfo->iniface,
-			ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
+			ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
 		return false;
 	}
 
@@ -111,7 +111,7 @@ ip_packet_match(const struct iphdr *ip,
 	if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
 		dprintf("VIA out mismatch (%s vs %s).%s\n",
 			outdev, ipinfo->outiface,
-			ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
+			ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
 		return false;
 	}
 
@@ -120,7 +120,7 @@ ip_packet_match(const struct iphdr *ip,
 	    FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
 		dprintf("Packet protocol %hi does not match %hi.%s\n",
 			ip->protocol, ipinfo->proto,
-			ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
+			ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
 		return false;
 	}
 
@@ -246,7 +246,8 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
 	return 0;
 }
 
-static void trace_packet(const struct sk_buff *skb,
+static void trace_packet(struct net *net,
+			 const struct sk_buff *skb,
 			 unsigned int hook,
 			 const struct net_device *in,
 			 const struct net_device *out,
@@ -258,7 +259,6 @@ static void trace_packet(const struct sk_buff *skb,
 	const char *hookname, *chainname, *comment;
 	const struct ipt_entry *iter;
 	unsigned int rulenum = 0;
-	struct net *net = dev_net(in ? in : out);
 
 	root = get_entry(private->entries, private->hook_entry[hook]);
 
@@ -285,10 +285,10 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ipt_do_table(struct sk_buff *skb,
-	     unsigned int hook,
 	     const struct nf_hook_state *state,
 	     struct xt_table *table)
 {
+	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -315,6 +315,7 @@ ipt_do_table(struct sk_buff *skb,
 	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 	acpar.thoff   = ip_hdrlen(skb);
 	acpar.hotdrop = false;
+	acpar.net     = state->net;
 	acpar.in      = state->in;
 	acpar.out     = state->out;
 	acpar.family  = NFPROTO_IPV4;
@@ -378,8 +379,8 @@ ipt_do_table(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
 		if (unlikely(skb->nf_trace))
-			trace_packet(skb, hook, state->in, state->out,
-				     table->name, private, e);
+			trace_packet(state->net, skb, hook, state->in,
+				     state->out, table->name, private, e);
 #endif
 		/* Standard target? */
 		if (!t->u.kernel.target->target) {
@@ -430,8 +431,8 @@ ipt_do_table(struct sk_buff *skb,
 	} while (!acpar.hotdrop);
 	pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
 
- 	xt_write_recseq_end(addend);
- 	local_bh_enable();
+	xt_write_recseq_end(addend);
+	local_bh_enable();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -483,7 +484,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				unsigned int oldpos, size;
 
 				if ((strcmp(t->target.u.user.name,
-			    		    XT_STANDARD_TARGET) == 0) &&
+					    XT_STANDARD_TARGET) == 0) &&
 				    t->verdict < -NF_MAX_VERDICT - 1) {
 					duprintf("mark_source_chains: bad "
 						"negative verdict (%i)\n",
@@ -548,7 +549,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				pos = newpos;
 			}
 		}
-		next:
+next:
 		duprintf("Finished chain %u\n", hook);
 	}
 	return 1;
@@ -803,7 +804,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
    newinfo) */
 static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
-                const struct ipt_replace *repl)
+		const struct ipt_replace *repl)
 {
 	struct ipt_entry *iter;
 	unsigned int i;
@@ -1077,7 +1078,7 @@ static int compat_table_info(const struct xt_table_info *info,
 #endif
 
 static int get_info(struct net *net, void __user *user,
-                    const int *len, int compat)
+		    const int *len, int compat)
 {
 	char name[XT_TABLE_MAXNAMELEN];
 	struct xt_table *t;
@@ -1303,7 +1304,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 
 static int
 do_add_counters(struct net *net, const void __user *user,
-                unsigned int len, int compat)
+		unsigned int len, int compat)
 {
 	unsigned int i;
 	struct xt_counters_info tmp;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 45cb16a6a4a3..4a9e6db9df8d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -492,14 +492,14 @@ static void arp_print(struct arp_payload *payload)
 {
 #define HBUFFERLEN 30
 	char hbuffer[HBUFFERLEN];
-	int j,k;
+	int j, k;
 
-	for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) {
+	for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
 		hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
 		hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
-		hbuffer[k++]=':';
+		hbuffer[k++] = ':';
 	}
-	hbuffer[--k]='\0';
+	hbuffer[--k] = '\0';
 
 	pr_debug("src %pI4@%s, dst %pI4\n",
 		 &payload->src_ip, hbuffer, &payload->dst_ip);
@@ -507,14 +507,14 @@ static void arp_print(struct arp_payload *payload)
 #endif
 
 static unsigned int
-arp_mangle(const struct nf_hook_ops *ops,
+arp_mangle(void *priv,
 	   struct sk_buff *skb,
 	   const struct nf_hook_state *state)
 {
 	struct arphdr *arp = arp_hdr(skb);
 	struct arp_payload *payload;
 	struct clusterip_config *c;
-	struct net *net = dev_net(state->in ? state->in : state->out);
+	struct net *net = state->net;
 
 	/* we don't care about non-ethernet and non-ipv4 ARP */
 	if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 87907d4bd259..1d16c0f28df0 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
 		break;
 	case IPT_TCP_RESET:
-		nf_send_reset(skb, hook);
+		nf_send_reset(par->net, skb, hook);
 	case IPT_ICMP_ECHOREPLY:
 		/* Doesn't happen. */
 		break;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 95ea633e8356..5fdc556514ba 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -39,11 +39,14 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 }
 
 static void
-synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+synproxy_send_tcp(const struct synproxy_net *snet,
+		  const struct sk_buff *skb, struct sk_buff *nskb,
 		  struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
 		  struct iphdr *niph, struct tcphdr *nth,
 		  unsigned int tcp_hdr_size)
 {
+	struct net *net = nf_ct_net(snet->tmpl);
+
 	nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
 	nskb->ip_summed   = CHECKSUM_PARTIAL;
 	nskb->csum_start  = (unsigned char *)nth - nskb->head;
@@ -51,7 +54,7 @@ synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
 
 	skb_dst_set_noref(nskb, skb_dst(skb));
 	nskb->protocol = htons(ETH_P_IP);
-	if (ip_route_me_harder(nskb, RTN_UNSPEC))
+	if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
 		goto free_nskb;
 
 	if (nfct) {
@@ -60,7 +63,7 @@ synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
 		nf_conntrack_get(nfct);
 	}
 
-	ip_local_out(nskb);
+	ip_local_out(net, nskb->sk, nskb);
 	return;
 
 free_nskb:
@@ -68,7 +71,8 @@ free_nskb:
 }
 
 static void
-synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+synproxy_send_client_synack(const struct synproxy_net *snet,
+			    const struct sk_buff *skb, const struct tcphdr *th,
 			    const struct synproxy_options *opts)
 {
 	struct sk_buff *nskb;
@@ -104,7 +108,7 @@ synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
 
 	synproxy_build_options(nth, opts);
 
-	synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+	synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
 			  niph, nth, tcp_hdr_size);
 }
 
@@ -148,7 +152,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
 
 	synproxy_build_options(nth, opts);
 
-	synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+	synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
 			  niph, nth, tcp_hdr_size);
 }
 
@@ -188,7 +192,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
 
 	synproxy_build_options(nth, opts);
 
-	synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+	synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
 }
 
 static void
@@ -226,8 +230,8 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
 
 	synproxy_build_options(nth, opts);
 
-	synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
-	                  niph, nth, tcp_hdr_size);
+	synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+			  niph, nth, tcp_hdr_size);
 }
 
 static bool
@@ -258,7 +262,7 @@ static unsigned int
 synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_synproxy_info *info = par->targinfo;
-	struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+	struct synproxy_net *snet = synproxy_pernet(par->net);
 	struct synproxy_options opts = {};
 	struct tcphdr *th, _th;
 
@@ -287,7 +291,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 					  XT_SYNPROXY_OPT_SACK_PERM |
 					  XT_SYNPROXY_OPT_ECN);
 
-		synproxy_send_client_synack(skb, th, &opts);
+		synproxy_send_client_synack(snet, skb, th, &opts);
 		return NF_DROP;
 
 	} else if (th->ack && !(th->fin || th->rst || th->syn)) {
@@ -299,11 +303,11 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
+static unsigned int ipv4_synproxy_hook(void *priv,
 				       struct sk_buff *skb,
 				       const struct nf_hook_state *nhs)
 {
-	struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
+	struct synproxy_net *snet = synproxy_pernet(nhs->net);
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	struct nf_conn_synproxy *synproxy;
@@ -433,14 +437,12 @@ static struct xt_target synproxy_tg4_reg __read_mostly = {
 static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
 	{
 		.hook		= ipv4_synproxy_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
 	},
 	{
 		.hook		= ipv4_synproxy_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 14a2aa8b8a14..a787d07f6cb7 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -25,7 +25,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	bool r;
 	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
 		 invert ? '!' : ' ', min, spi, max);
-	r=(spi >= min && spi <= max) ^ invert;
+	r = (spi >= min && spi <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c4ffc9de1654..78cc64eddfc1 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -32,12 +32,11 @@ static __be32 rpfilter_get_saddr(__be32 addr)
 	return addr;
 }
 
-static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
 				const struct net_device *dev, u8 flags)
 {
 	struct fib_result res;
 	bool dev_match;
-	struct net *net = dev_net(dev);
 	int ret __maybe_unused;
 
 	if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
@@ -96,7 +95,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	flow.flowi4_tos = RT_TOS(iph->tos);
 	flow.flowi4_scope = RT_SCOPE_UNIVERSE;
 
-	return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+	return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
 }
 
 static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index a0f3beca52d2..397ef2dd133e 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,19 +33,16 @@ static const struct xt_table packet_filter = {
 };
 
 static unsigned int
-iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_filter_hook(void *priv, struct sk_buff *skb,
 		    const struct nf_hook_state *state)
 {
-	const struct net *net;
-
-	if (ops->hooknum == NF_INET_LOCAL_OUT &&
+	if (state->hook == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	net = dev_net(state->in ? state->in : state->out);
-	return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter);
+	return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 62cbb8c5f4a8..ba5d392a13c4 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -39,7 +39,6 @@ static const struct xt_table packet_mangler = {
 static unsigned int
 ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 {
-	struct net_device *out = state->out;
 	unsigned int ret;
 	const struct iphdr *iph;
 	u_int8_t tos;
@@ -59,8 +58,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state,
-			   dev_net(out)->ipv4.iptable_mangle);
+	ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN) {
 		iph = ip_hdr(skb);
@@ -69,7 +67,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 		    iph->daddr != daddr ||
 		    skb->mark != mark ||
 		    iph->tos != tos) {
-			err = ip_route_me_harder(skb, RTN_UNSPEC);
+			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
@@ -80,18 +78,17 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-iptable_mangle_hook(const struct nf_hook_ops *ops,
+iptable_mangle_hook(void *priv,
 		     struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	if (ops->hooknum == NF_INET_LOCAL_OUT)
+	if (state->hook == NF_INET_LOCAL_OUT)
 		return ipt_mangle_out(skb, state);
-	if (ops->hooknum == NF_INET_POST_ROUTING)
-		return ipt_do_table(skb, ops->hooknum, state,
-				    dev_net(state->out)->ipv4.iptable_mangle);
+	if (state->hook == NF_INET_POST_ROUTING)
+		return ipt_do_table(skb, state,
+				    state->net->ipv4.iptable_mangle);
 	/* PREROUTING/INPUT/FORWARD: */
-	return ipt_do_table(skb, ops->hooknum, state,
-			    dev_net(state->in)->ipv4.iptable_mangle);
+	return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 0d4d9cdf98a4..ae2cd2752046 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -28,49 +28,46 @@ static const struct xt_table nf_nat_ipv4_table = {
 	.af		= NFPROTO_IPV4,
 };
 
-static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_do_chain(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state,
 					 struct nf_conn *ct)
 {
-	struct net *net = nf_ct_net(ct);
-
-	return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table);
+	return ipt_do_table(skb, state, state->net->ipv4.nat_table);
 }
 
-static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_fn(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain);
+	return nf_nat_ipv4_fn(priv, skb, state, iptable_nat_do_chain);
 }
 
-static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_in(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain);
+	return nf_nat_ipv4_in(priv, skb, state, iptable_nat_do_chain);
 }
 
-static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_out(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain);
+	return nf_nat_ipv4_out(priv, skb, state, iptable_nat_do_chain);
 }
 
-static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_local_fn(void *priv,
 					      struct sk_buff *skb,
 					      const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain);
+	return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
 }
 
 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= iptable_nat_ipv4_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_NAT_DST,
@@ -78,7 +75,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	/* After packet filtering, change source */
 	{
 		.hook		= iptable_nat_ipv4_out,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_NAT_SRC,
@@ -86,7 +82,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= iptable_nat_ipv4_local_fn,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_NAT_DST,
@@ -94,7 +89,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
 	/* After packet filtering, change source */
 	{
 		.hook		= iptable_nat_ipv4_fn,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 0356e6da4bb7..1ba02811acb0 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,19 +20,16 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_raw_hook(void *priv, struct sk_buff *skb,
 		 const struct nf_hook_state *state)
 {
-	const struct net *net;
-
-	if (ops->hooknum == NF_INET_LOCAL_OUT &&
+	if (state->hook == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* root is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	net = dev_net(state->in ? state->in : state->out);
-	return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw);
+	return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 4bce3980ccd9..c2e23d5e9cd4 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,20 +37,16 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_security_hook(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
-	const struct net *net;
-
-	if (ops->hooknum == NF_INET_LOCAL_OUT &&
+	if (state->hook == NF_INET_LOCAL_OUT &&
 	    (skb->len < sizeof(struct iphdr) ||
 	     ip_hdrlen(skb) < sizeof(struct iphdr)))
 		/* Somebody is playing with raw sockets. */
 		return NF_ACCEPT;
 
-	net = dev_net(state->in ? state->in : state->out);
-	return ipt_do_table(skb, ops->hooknum, state,
-			    net->ipv4.iptable_security);
+	return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -83,7 +79,7 @@ static int __init iptable_security_init(void)
 	int ret;
 
 	ret = register_pernet_subsys(&iptable_security_net_ops);
-        if (ret < 0)
+	if (ret < 0)
 		return ret;
 
 	sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8a2caaf3940b..461ca926fd39 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
+static unsigned int ipv4_helper(void *priv,
 				struct sk_buff *skb,
 				const struct nf_hook_state *state)
 {
@@ -119,7 +119,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
 			    ct, ctinfo);
 }
 
-static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
+static unsigned int ipv4_confirm(void *priv,
 				 struct sk_buff *skb,
 				 const struct nf_hook_state *state)
 {
@@ -143,14 +143,14 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_in(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
-	return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb);
+	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
 }
 
-static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_local(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
@@ -158,7 +158,7 @@ static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
-	return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb);
+	return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
 }
 
 /* Connection tracking may drop packets, but never alters them, so
@@ -166,42 +166,36 @@ static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
 static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
 	{
 		.hook		= ipv4_conntrack_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv4_conntrack_local,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv4_helper,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv4_confirm,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
 	},
 	{
 		.hook		= ipv4_helper,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv4_confirm,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index cdde3ec496e9..c567e1b5d799 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -30,7 +30,7 @@ static inline struct nf_icmp_net *icmp_pernet(struct net *net)
 }
 
 static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			      struct nf_conntrack_tuple *tuple)
+			      struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	const struct icmphdr *hp;
 	struct icmphdr _hdr;
@@ -144,7 +144,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
 	if (!nf_ct_get_tuplepr(skb,
 			       skb_network_offset(skb) + ip_hdrlen(skb)
 						       + sizeof(struct icmphdr),
-			       PF_INET, &origtuple)) {
+			       PF_INET, net, &origtuple)) {
 		pr_debug("icmp_error_message: failed to get tuple\n");
 		return -NF_ACCEPT;
 	}
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 9306ec4fab41..6fb869f646bf 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,14 +22,15 @@
 #endif
 #include <net/netfilter/nf_conntrack_zones.h>
 
-static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
+				   u_int32_t user)
 {
 	int err;
 
 	skb_orphan(skb);
 
 	local_bh_disable();
-	err = ip_defrag(skb, user);
+	err = ip_defrag(net, skb, user);
 	local_bh_enable();
 
 	if (!err) {
@@ -61,15 +62,14 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 		return IP_DEFRAG_CONNTRACK_OUT + zone_id;
 }
 
-static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_defrag(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
 	struct sock *sk = skb->sk;
-	struct inet_sock *inet = inet_sk(skb->sk);
 
-	if (sk && (sk->sk_family == PF_INET) &&
-	    inet->nodefrag)
+	if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
+	    inet_sk(sk)->nodefrag)
 		return NF_ACCEPT;
 
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
@@ -83,9 +83,9 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
 	/* Gather fragments. */
 	if (ip_is_fragment(ip_hdr(skb))) {
 		enum ip_defrag_users user =
-			nf_ct_defrag_user(ops->hooknum, skb);
+			nf_ct_defrag_user(state->hook, skb);
 
-		if (nf_ct_ipv4_gather_frags(skb, user))
+		if (nf_ct_ipv4_gather_frags(state->net, skb, user))
 			return NF_STOLEN;
 	}
 	return NF_ACCEPT;
@@ -94,14 +94,12 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
 static struct nf_hook_ops ipv4_defrag_ops[] = {
 	{
 		.hook		= ipv4_conntrack_defrag,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_DEFRAG,
 	},
 	{
 		.hook           = ipv4_conntrack_defrag,
-		.owner          = THIS_MODULE,
 		.pf             = NFPROTO_IPV4,
 		.hooknum        = NF_INET_LOCAL_OUT,
 		.priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 2d79e6e8d934..ceb187308120 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -23,25 +23,10 @@
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
-static struct net *pick_net(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_NS
-	const struct dst_entry *dst;
-
-	if (skb->dev != NULL)
-		return dev_net(skb->dev);
-	dst = skb_dst(skb);
-	if (dst != NULL && dst->dev != NULL)
-		return dev_net(dst->dev);
-#endif
-	return &init_net;
-}
-
-static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
-			      int oif)
+static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
+			      const struct in_addr *gw, int oif)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	struct net *net = pick_net(skb);
 	struct rtable *rt;
 	struct flowi4 fl4;
 
@@ -65,7 +50,7 @@ static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
 	return true;
 }
 
-void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
+void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 		 const struct in_addr *gw, int oif)
 {
 	struct iphdr *iph;
@@ -105,9 +90,9 @@ void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
 		--iph->ttl;
 	ip_send_check(iph);
 
-	if (nf_dup_ipv4_route(skb, gw, oif)) {
+	if (nf_dup_ipv4_route(net, skb, gw, oif)) {
 		__this_cpu_write(nf_skb_duplicated, true);
-		ip_local_out(skb);
+		ip_local_out(net, skb->sk, skb);
 		__this_cpu_write(nf_skb_duplicated, false);
 	} else {
 		kfree_skb(skb);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 22f4579b0c2a..5075b7ecd26d 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,9 +255,9 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
 
 unsigned int
-nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state,
-	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+	       unsigned int (*do_chain)(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state,
 					struct nf_conn *ct))
@@ -266,7 +266,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn_nat *nat;
 	/* maniptype == SRC for postrouting. */
-	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
 
 	/* We never see fragments: conntrack defrags on pre-routing
 	 * and local-out, and nf_nat_out protects post-routing.
@@ -295,7 +295,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	case IP_CT_RELATED_REPLY:
 		if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
 			if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
-							   ops->hooknum))
+							   state->hook))
 				return NF_DROP;
 			else
 				return NF_ACCEPT;
@@ -308,21 +308,21 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = do_chain(ops, skb, state, ct);
+			ret = do_chain(priv, skb, state, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 
-			if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+			if (nf_nat_initialized(ct, HOOK2MANIP(state->hook)))
 				break;
 
-			ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+			ret = nf_nat_alloc_null_binding(ct, state->hook);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else {
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
-			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat,
+			if (nf_nat_oif_changed(state->hook, ctinfo, nat,
 					       state->out))
 				goto oif_changed;
 		}
@@ -332,11 +332,11 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
 
-	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+	return nf_nat_packet(ct, ctinfo, state->hook, skb);
 
 oif_changed:
 	nf_ct_kill_acct(ct, ctinfo, skb);
@@ -345,9 +345,9 @@ oif_changed:
 EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
 
 unsigned int
-nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state,
-	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+	       unsigned int (*do_chain)(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state,
 					 struct nf_conn *ct))
@@ -355,7 +355,7 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	unsigned int ret;
 	__be32 daddr = ip_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    daddr != ip_hdr(skb)->daddr)
 		skb_dst_drop(skb);
@@ -365,9 +365,9 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
 
 unsigned int
-nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
 		const struct nf_hook_state *state,
-		unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+		unsigned int (*do_chain)(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state,
 					  struct nf_conn *ct))
@@ -384,7 +384,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -396,7 +396,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 		     ct->tuplehash[dir].tuple.src.u.all !=
 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
-			err = nf_xfrm_me_harder(skb, AF_INET);
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
@@ -407,9 +407,9 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
 
 unsigned int
-nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state,
-		     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+		     unsigned int (*do_chain)(void *priv,
 					       struct sk_buff *skb,
 					       const struct nf_hook_state *state,
 					       struct nf_conn *ct))
@@ -424,14 +424,14 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
-			err = ip_route_me_harder(skb, RTN_UNSPEC);
+			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
@@ -440,7 +440,7 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
 			 ct->tuplehash[dir].tuple.dst.u.all !=
 			 ct->tuplehash[!dir].tuple.src.u.all) {
-			err = nf_xfrm_me_harder(skb, AF_INET);
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 7c676671329d..ddb894ac1458 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1156,7 +1156,7 @@ static int snmp_parse_mangle(unsigned char *msg,
 		}
 
 		if (obj->type == SNMP_IPADDR)
-			mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
+			mangle_address(ctx.begin, ctx.pointer - 4, map, check);
 
 		kfree(obj->id);
 		kfree(obj);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 3262e41ff76f..c747b2d9eb77 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -99,7 +99,7 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
 EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
 
 /* Send RST reply */
-void nf_send_reset(struct sk_buff *oldskb, int hook)
+void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
 {
 	struct sk_buff *nskb;
 	const struct iphdr *oiph;
@@ -129,7 +129,7 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
 				   ip4_dst_hoplimit(skb_dst(nskb)));
 	nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
 
-	if (ip_route_me_harder(nskb, RTN_UNSPEC))
+	if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
 		goto free_nskb;
 
 	/* "Never happens" */
@@ -157,7 +157,7 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
 		dev_queue_xmit(nskb);
 	} else
 #endif
-		ip_local_out(nskb);
+		ip_local_out(net, nskb->sk, nskb);
 
 	return;
 
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 8412268bbad1..9d09d4f59545 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -15,15 +15,15 @@
 #include <net/netfilter/nf_tables.h>
 
 static unsigned int
-nft_do_chain_arp(const struct nf_hook_ops *ops,
+nft_do_chain_arp(void *priv,
 		  struct sk_buff *skb,
 		  const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo(&pkt, ops, skb, state);
+	nft_set_pktinfo(&pkt, skb, state);
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
 static struct nft_af_info nft_af_arp __read_mostly = {
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index aa180d3a69a5..ca9dc3c46c4f 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -18,18 +18,18 @@
 #include <net/ip.h>
 #include <net/netfilter/nf_tables_ipv4.h>
 
-static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+static unsigned int nft_do_chain_ipv4(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+	nft_set_pktinfo_ipv4(&pkt, skb, state);
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+static unsigned int nft_ipv4_output(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
@@ -41,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 	}
 
-	return nft_do_chain_ipv4(ops, skb, state);
+	return nft_do_chain_ipv4(priv, skb, state);
 }
 
 struct nft_af_info nft_af_ipv4 __read_mostly = {
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index bf5c30ae14e4..f5c66a7a4bf2 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -26,44 +26,44 @@
 #include <net/netfilter/nf_nat_l3proto.h>
 #include <net/ip.h>
 
-static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_do_chain(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state,
 				      struct nf_conn *ct)
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+	nft_set_pktinfo_ipv4(&pkt, skb, state);
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_fn(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv4_fn(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_in(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv4_in(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_out(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv4_out(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_local_fn(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
 }
 
 static const struct nf_chain_type nft_chain_nat_ipv4 = {
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index e335b0afdaf3..2375b0a8be46 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -21,7 +21,7 @@
 #include <net/route.h>
 #include <net/ip.h>
 
-static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+static unsigned int nf_route_table_hook(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
@@ -37,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	    ip_hdrlen(skb) < sizeof(struct iphdr))
 		return NF_ACCEPT;
 
-	nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+	nft_set_pktinfo_ipv4(&pkt, skb, state);
 
 	mark = skb->mark;
 	iph = ip_hdr(skb);
@@ -45,7 +45,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = nft_do_chain(&pkt, ops);
+	ret = nft_do_chain(&pkt, priv);
 	if (ret != NF_DROP && ret != NF_QUEUE) {
 		iph = ip_hdr(skb);
 
@@ -53,7 +53,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 		    iph->daddr != daddr ||
 		    skb->mark != mark ||
 		    iph->tos != tos)
-			if (ip_route_me_harder(skb, RTN_UNSPEC))
+			if (ip_route_me_harder(state->net, skb, RTN_UNSPEC))
 				ret = NF_DROP;
 	}
 	return ret;
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index b45932d43b69..bf855e64fc45 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
 	};
 	int oif = regs->data[priv->sreg_dev];
 
-	nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif);
+	nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
 }
 
 static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 40e414c4ca56..b72ffc58e255 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -26,7 +26,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
 	memset(&range, 0, sizeof(range));
 	range.flags = priv->flags;
 
-	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
+	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook,
 						    &range, pkt->out);
 }
 
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index d8d795df9c13..c09d4381427e 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -36,7 +36,7 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
 	mr.range[0].flags |= priv->flags;
 
 	regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr,
-						  pkt->ops->hooknum);
+						  pkt->hook);
 }
 
 static struct nft_expr_type nft_redir_ipv4_type;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index b07e58b51158..c24f41c816b3 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,11 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
 
 	switch (priv->type) {
 	case NFT_REJECT_ICMP_UNREACH:
-		nf_send_unreach(pkt->skb, priv->icmp_code,
-				pkt->ops->hooknum);
+		nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook);
 		break;
 	case NFT_REJECT_TCP_RST:
-		nf_send_reset(pkt->skb, pkt->ops->hooknum);
+		nf_send_reset(pkt->net, pkt->skb, pkt->hook);
 		break;
 	default:
 		break;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 561cd4b8fc6e..8c0d0bdc2a7c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -411,8 +411,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 		icmp_out_count(net, ((struct icmphdr *)
 			skb_transport_header(skb))->type);
 
-	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb,
-		      NULL, rt->dst.dev, dst_output_sk);
+	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, rt->dst.dev,
+		      dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
@@ -483,6 +484,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
 static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ipcm_cookie ipc;
 	struct rtable *rt = NULL;
 	struct flowi4 fl4;
@@ -542,7 +544,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	ipc.oif = sk->sk_bound_dev_if;
 
 	if (msg->msg_controllen) {
-		err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
+		err = ip_cmsg_send(net, msg, &ipc, false);
 		if (err)
 			goto out;
 		if (ipc.opt)
@@ -597,6 +599,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
 			   daddr, saddr, 0, 0);
 
+	if (!saddr && ipc.oif)
+		l3mdev_get_saddr(net, ipc.oif, &fl4);
+
 	if (!inet->hdrincl) {
 		rfv.msg = msg;
 		rfv.hlen = 0;
@@ -607,7 +612,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	}
 
 	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
-	rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+	rt = ip_route_output_flow(net, &fl4, sk);
 	if (IS_ERR(rt)) {
 		err = PTR_ERR(rt);
 		rt = NULL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c81deb85acb4..85f184e429c6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,7 @@
 #endif
 #include <net/secure_seq.h>
 #include <net/ip_tunnels.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
 
 #define RT_FL_TOS(oldflp4) \
 	((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -847,7 +847,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 		return;
 	}
 	log_martians = IN_DEV_LOG_MARTIANS(in_dev);
-	vif = vrf_master_ifindex_rcu(rt->dst.dev);
+	vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
 	rcu_read_unlock();
 
 	net = dev_net(rt->dst.dev);
@@ -941,7 +941,7 @@ static int ip_error(struct sk_buff *skb)
 	}
 
 	peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
-			       vrf_master_ifindex(skb->dev), 1);
+			       l3mdev_master_ifindex(skb->dev), 1);
 
 	send = true;
 	if (peer) {
@@ -1152,7 +1152,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 		dst_set_expires(&rt->dst, 0);
 }
 
-static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
+static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	pr_debug("%s: %pI4 -> %pI4, %s\n",
 		 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -1438,12 +1438,34 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 }
 
 static struct rtable *rt_dst_alloc(struct net_device *dev,
+				   unsigned int flags, u16 type,
 				   bool nopolicy, bool noxfrm, bool will_cache)
 {
-	return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
-			 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
-			 (nopolicy ? DST_NOPOLICY : 0) |
-			 (noxfrm ? DST_NOXFRM : 0));
+	struct rtable *rt;
+
+	rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+		       (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
+		       (nopolicy ? DST_NOPOLICY : 0) |
+		       (noxfrm ? DST_NOXFRM : 0));
+
+	if (rt) {
+		rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+		rt->rt_flags = flags;
+		rt->rt_type = type;
+		rt->rt_is_input = 0;
+		rt->rt_iif = 0;
+		rt->rt_pmtu = 0;
+		rt->rt_gateway = 0;
+		rt->rt_uses_gateway = 0;
+		rt->rt_table_id = 0;
+		INIT_LIST_HEAD(&rt->rt_uncached);
+
+		rt->dst.output = ip_output;
+		if (flags & RTCF_LOCAL)
+			rt->dst.input = ip_local_deliver;
+	}
+
+	return rt;
 }
 
 /* called in rcu_read_lock() section */
@@ -1452,6 +1474,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
 	struct rtable *rth;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
+	unsigned int flags = RTCF_MULTICAST;
 	u32 itag = 0;
 	int err;
 
@@ -1464,9 +1487,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	    skb->protocol != htons(ETH_P_IP))
 		goto e_inval;
 
-	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
-		if (ipv4_is_loopback(saddr))
-			goto e_inval;
+	if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
+		goto e_inval;
 
 	if (ipv4_is_zeronet(saddr)) {
 		if (!ipv4_is_local_multicast(daddr))
@@ -1477,7 +1499,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		if (err < 0)
 			goto e_err;
 	}
-	rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
+	if (our)
+		flags |= RTCF_LOCAL;
+
+	rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
 	if (!rth)
 		goto e_nobufs;
@@ -1486,20 +1511,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->dst.tclassid = itag;
 #endif
 	rth->dst.output = ip_rt_bug;
-
-	rth->rt_genid	= rt_genid_ipv4(dev_net(dev));
-	rth->rt_flags	= RTCF_MULTICAST;
-	rth->rt_type	= RTN_MULTICAST;
 	rth->rt_is_input= 1;
-	rth->rt_iif	= 0;
-	rth->rt_pmtu	= 0;
-	rth->rt_gateway	= 0;
-	rth->rt_uses_gateway = 0;
-	INIT_LIST_HEAD(&rth->rt_uncached);
-	if (our) {
-		rth->dst.input= ip_local_deliver;
-		rth->rt_flags |= RTCF_LOCAL;
-	}
 
 #ifdef CONFIG_IP_MROUTE
 	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -1608,7 +1620,7 @@ static int __mkroute_input(struct sk_buff *skb,
 		}
 	}
 
-	rth = rt_dst_alloc(out_dev->dev,
+	rth = rt_dst_alloc(out_dev->dev, 0, res->type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
 			   IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
 	if (!rth) {
@@ -1616,19 +1628,12 @@ static int __mkroute_input(struct sk_buff *skb,
 		goto cleanup;
 	}
 
-	rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
-	rth->rt_flags = 0;
-	rth->rt_type = res->type;
 	rth->rt_is_input = 1;
-	rth->rt_iif 	= 0;
-	rth->rt_pmtu	= 0;
-	rth->rt_gateway	= 0;
-	rth->rt_uses_gateway = 0;
-	INIT_LIST_HEAD(&rth->rt_uncached);
+	if (res->table)
+		rth->rt_table_id = res->table->tb_id;
 	RT_CACHE_STAT_INC(in_slow_tot);
 
 	rth->dst.input = ip_forward;
-	rth->dst.output = ip_output;
 
 	rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
 	if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
@@ -1646,6 +1651,48 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+/* To make ICMP packets follow the right flow, the multipath hash is
+ * calculated from the inner IP addresses in reverse order.
+ */
+static int ip_multipath_icmp_hash(struct sk_buff *skb)
+{
+	const struct iphdr *outer_iph = ip_hdr(skb);
+	struct icmphdr _icmph;
+	const struct icmphdr *icmph;
+	struct iphdr _inner_iph;
+	const struct iphdr *inner_iph;
+
+	if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
+		goto standard_hash;
+
+	icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
+				   &_icmph);
+	if (!icmph)
+		goto standard_hash;
+
+	if (icmph->type != ICMP_DEST_UNREACH &&
+	    icmph->type != ICMP_REDIRECT &&
+	    icmph->type != ICMP_TIME_EXCEEDED &&
+	    icmph->type != ICMP_PARAMETERPROB) {
+		goto standard_hash;
+	}
+
+	inner_iph = skb_header_pointer(skb,
+				       outer_iph->ihl * 4 + sizeof(_icmph),
+				       sizeof(_inner_iph), &_inner_iph);
+	if (!inner_iph)
+		goto standard_hash;
+
+	return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
+
+standard_hash:
+	return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
+}
+
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+
 static int ip_mkroute_input(struct sk_buff *skb,
 			    struct fib_result *res,
 			    const struct flowi4 *fl4,
@@ -1653,8 +1700,15 @@ static int ip_mkroute_input(struct sk_buff *skb,
 			    __be32 daddr, __be32 saddr, u32 tos)
 {
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res->fi && res->fi->fib_nhs > 1)
-		fib_select_multipath(res);
+	if (res->fi && res->fi->fib_nhs > 1) {
+		int h;
+
+		if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
+			h = ip_multipath_icmp_hash(skb);
+		else
+			h = fib_multipath_hash(saddr, daddr);
+		fib_select_multipath(res, h);
+	}
 #endif
 
 	/* create a routing cache entry */
@@ -1706,6 +1760,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto martian_source;
 
 	res.fi = NULL;
+	res.table = NULL;
 	if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
 		goto brd_input;
 
@@ -1733,7 +1788,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	 *	Now we are ready to route packet.
 	 */
 	fl4.flowi4_oif = 0;
-	fl4.flowi4_iif = vrf_master_ifindex_rcu(dev) ? : dev->ifindex;
+	fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
 	fl4.flowi4_mark = skb->mark;
 	fl4.flowi4_tos = tos;
 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -1754,7 +1809,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		err = fib_validate_source(skb, saddr, daddr, tos,
 					  0, dev, in_dev, &itag);
 		if (err < 0)
-			goto martian_source_keep_err;
+			goto martian_source;
 		goto local_input;
 	}
 
@@ -1776,7 +1831,7 @@ brd_input:
 		err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
 					  in_dev, &itag);
 		if (err < 0)
-			goto martian_source_keep_err;
+			goto martian_source;
 	}
 	flags |= RTCF_BROADCAST;
 	res.type = RTN_BROADCAST;
@@ -1796,26 +1851,18 @@ local_input:
 		}
 	}
 
-	rth = rt_dst_alloc(net->loopback_dev,
+	rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
 	if (!rth)
 		goto e_nobufs;
 
-	rth->dst.input= ip_local_deliver;
 	rth->dst.output= ip_rt_bug;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	rth->dst.tclassid = itag;
 #endif
-
-	rth->rt_genid = rt_genid_ipv4(net);
-	rth->rt_flags 	= flags|RTCF_LOCAL;
-	rth->rt_type	= res.type;
 	rth->rt_is_input = 1;
-	rth->rt_iif	= 0;
-	rth->rt_pmtu	= 0;
-	rth->rt_gateway	= 0;
-	rth->rt_uses_gateway = 0;
-	INIT_LIST_HEAD(&rth->rt_uncached);
+	if (res.table)
+		rth->rt_table_id = res.table->tb_id;
 
 	RT_CACHE_STAT_INC(in_slow_tot);
 	if (res.type == RTN_UNREACHABLE) {
@@ -1837,6 +1884,7 @@ no_route:
 	RT_CACHE_STAT_INC(in_no_route);
 	res.type = RTN_UNREACHABLE;
 	res.fi = NULL;
+	res.table = NULL;
 	goto local_input;
 
 	/*
@@ -1859,8 +1907,6 @@ e_nobufs:
 	goto out;
 
 martian_source:
-	err = -EINVAL;
-martian_source_keep_err:
 	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
 	goto out;
 }
@@ -1988,28 +2034,19 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	}
 
 add:
-	rth = rt_dst_alloc(dev_out,
+	rth = rt_dst_alloc(dev_out, flags, type,
 			   IN_DEV_CONF_GET(in_dev, NOPOLICY),
 			   IN_DEV_CONF_GET(in_dev, NOXFRM),
 			   do_cache);
 	if (!rth)
 		return ERR_PTR(-ENOBUFS);
 
-	rth->dst.output = ip_output;
-
-	rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
-	rth->rt_flags	= flags;
-	rth->rt_type	= type;
-	rth->rt_is_input = 0;
 	rth->rt_iif	= orig_oif ? : 0;
-	rth->rt_pmtu	= 0;
-	rth->rt_gateway = 0;
-	rth->rt_uses_gateway = 0;
-	INIT_LIST_HEAD(&rth->rt_uncached);
+	if (res->table)
+		rth->rt_table_id = res->table->tb_id;
+
 	RT_CACHE_STAT_INC(out_slow_tot);
 
-	if (flags & RTCF_LOCAL)
-		rth->dst.input = ip_local_deliver;
 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
 		if (flags & RTCF_LOCAL &&
 		    !(dev_out->flags & IFF_LOOPBACK)) {
@@ -2038,7 +2075,8 @@ add:
  * Major route resolver routine.
  */
 
-struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
+struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
+					  int mp_hash)
 {
 	struct net_device *dev_out = NULL;
 	__u8 tos = RT_FL_TOS(fl4);
@@ -2137,11 +2175,10 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 				fl4->saddr = inet_select_addr(dev_out, 0,
 							      RT_SCOPE_HOST);
 		}
-		if (netif_is_vrf(dev_out) &&
-		    !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
-			rth = vrf_dev_get_rth(dev_out);
+
+		rth = l3mdev_get_rtable(dev_out, fl4);
+		if (rth)
 			goto out;
-		}
 	}
 
 	if (!fl4->daddr) {
@@ -2159,7 +2196,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 	if (err) {
 		res.fi = NULL;
 		res.table = NULL;
-		if (fl4->flowi4_oif) {
+		if (fl4->flowi4_oif &&
+		    !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
 			/* Apparently, routing tables are wrong. Assume,
 			   that the destination is on link.
 
@@ -2201,18 +2239,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 		goto make_route;
 	}
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-	if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
-		fib_select_multipath(&res);
-	else
-#endif
-	if (!res.prefixlen &&
-	    res.table->tb_num_default > 1 &&
-	    res.type == RTN_UNICAST && !fl4->flowi4_oif)
-		fib_select_default(fl4, &res);
-
-	if (!fl4->saddr)
-		fl4->saddr = FIB_RES_PREFSRC(net, res);
+	fib_select_path(net, &res, fl4, mp_hash);
 
 	dev_out = FIB_RES_DEV(res);
 	fl4->flowi4_oif = dev_out->ifindex;
@@ -2225,7 +2252,7 @@ out:
 	rcu_read_unlock();
 	return rth;
 }
-EXPORT_SYMBOL_GPL(__ip_route_output_key);
+EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
 
 static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
 {
@@ -2277,7 +2304,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 
 		new->__use = 1;
 		new->input = dst_discard;
-		new->output = dst_discard_sk;
+		new->output = dst_discard_out;
 
 		new->dev = ort->dst.dev;
 		if (new->dev)
@@ -2303,7 +2330,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 }
 
 struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
-				    struct sock *sk)
+				    const struct sock *sk)
 {
 	struct rtable *rt = __ip_route_output_key(net, flp4);
 
@@ -2319,7 +2346,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
 }
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
-static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
+static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 			struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
 			u32 seq, int event, int nowait, unsigned int flags)
 {
@@ -2339,8 +2366,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	r->rtm_dst_len	= 32;
 	r->rtm_src_len	= 0;
 	r->rtm_tos	= fl4->flowi4_tos;
-	r->rtm_table	= RT_TABLE_MAIN;
-	if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
+	r->rtm_table	= table_id;
+	if (nla_put_u32(skb, RTA_TABLE, table_id))
 		goto nla_put_failure;
 	r->rtm_type	= rt->rt_type;
 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
@@ -2445,6 +2472,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	int err;
 	int mark;
 	struct sk_buff *skb;
+	u32 table_id = RT_TABLE_MAIN;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
 	if (err < 0)
@@ -2480,6 +2508,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
 	fl4.flowi4_mark = mark;
 
+	if (netif_index_is_l3_master(net, fl4.flowi4_oif))
+		fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
+
 	if (iif) {
 		struct net_device *dev;
 
@@ -2514,7 +2545,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
-	err = rt_fill_info(net, dst, src, &fl4, skb,
+	if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
+		table_id = rt->rt_table_id;
+
+	err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
 			   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
 			   RTM_NEWROUTE, 0, 0);
 	if (err < 0)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d70b1f603692..4cbe9f0a4281 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -192,15 +192,11 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
 }
 EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
 
-__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb,
-			      __u16 *mssp)
+__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
 
-	tcp_synq_overflow(sk);
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
 	return __cookie_v4_init_sequence(iph, th, mssp);
 }
 
@@ -225,10 +221,13 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
+	bool own_req;
 
-	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
+	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
+						 NULL, &own_req);
 	if (child) {
 		atomic_set(&req->rsk_refcnt, 1);
+		sock_rps_save_rxhash(child, skb);
 		inet_csk_reqsk_queue_add(sk, req, child);
 	} else {
 		reqsk_free(req);
@@ -288,6 +287,10 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
 }
 EXPORT_SYMBOL(cookie_ecn_ok);
 
+/* On input, sk is a listener.
+ * Output is listener if incoming packet would not create a child
+ *           NULL if memory could not be allocated.
+ */
 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
@@ -326,7 +329,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 		goto out;
 
 	ret = NULL;
-	req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */
+	req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
 	if (!req)
 		goto out;
 
@@ -345,7 +348,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+	treq->snt_synack.v64	= 0;
 	treq->tfo_listener	= false;
 
 	ireq->ir_iif = sk->sk_bound_dev_if;
@@ -381,10 +384,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	}
 
 	/* Try to redo what tcp_v4_send_synack did. */
-	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+	req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
-				  &req->rcv_wnd, &req->window_clamp,
+				  &req->rsk_rcv_wnd, &req->rsk_window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
 				  dst_metric(&rt->dst, RTAX_INITRWND));
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 894da3a70aff..a0bd7a55193e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -48,14 +48,14 @@ static void set_local_port_range(struct net *net, int range[2])
 {
 	bool same_parity = !((range[0] ^ range[1]) & 1);
 
-	write_seqlock(&net->ipv4.ip_local_ports.lock);
+	write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
 	if (same_parity && !net->ipv4.ip_local_ports.warned) {
 		net->ipv4.ip_local_ports.warned = true;
 		pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
 	}
 	net->ipv4.ip_local_ports.range[0] = range[0];
 	net->ipv4.ip_local_ports.range[1] = range[1];
-	write_sequnlock(&net->ipv4.ip_local_ports.lock);
+	write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
 }
 
 /* Validate changes from /proc interface. */
@@ -496,6 +496,13 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
+		.procname	= "tcp_recovery",
+		.data		= &sysctl_tcp_recovery,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
 		.procname	= "tcp_reordering",
 		.data		= &sysctl_tcp_reordering,
 		.maxlen		= sizeof(int),
@@ -577,6 +584,13 @@ static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
+		.procname	= "tcp_min_rtt_wlen",
+		.data		= &sysctl_tcp_min_rtt_wlen,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
 		.procname	= "tcp_low_latency",
 		.data		= &sysctl_tcp_low_latency,
 		.maxlen		= sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8b8fa184f75..0cfa7c0c1e80 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -388,6 +388,7 @@ void tcp_init_sock(struct sock *sk)
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+	tp->rtt_min[0].rtt = ~0U;
 
 	/* So many TCP implementations out there (incorrectly) count the
 	 * initial SYN frame in their delayed-ACK and congestion control
@@ -900,7 +901,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 	 */
 	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
 	    !tcp_passive_fastopen(sk)) {
-		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
+		err = sk_stream_wait_connect(sk, &timeo);
+		if (err != 0)
 			goto out_err;
 	}
 
@@ -967,7 +969,8 @@ new_segment:
 
 		copied += copy;
 		offset += copy;
-		if (!(size -= copy)) {
+		size -= copy;
+		if (!size) {
 			tcp_tx_timestamp(sk, skb);
 			goto out;
 		}
@@ -988,7 +991,8 @@ wait_for_memory:
 		tcp_push(sk, flags & ~MSG_MORE, mss_now,
 			 TCP_NAGLE_PUSH, size_goal);
 
-		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err != 0)
 			goto do_error;
 
 		mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1111,7 +1115,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	 */
 	if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
 	    !tcp_passive_fastopen(sk)) {
-		if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
+		err = sk_stream_wait_connect(sk, &timeo);
+		if (err != 0)
 			goto do_error;
 	}
 
@@ -1267,7 +1272,8 @@ wait_for_memory:
 			tcp_push(sk, flags & ~MSG_MORE, mss_now,
 				 TCP_NAGLE_PUSH, size_goal);
 
-		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err != 0)
 			goto do_error;
 
 		mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1767,7 +1773,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 
 			/* __ Restore normal policy in scheduler __ */
 
-			if ((chunk = len - tp->ucopy.len) != 0) {
+			chunk = len - tp->ucopy.len;
+			if (chunk != 0) {
 				NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
 				len -= chunk;
 				copied += chunk;
@@ -1778,7 +1785,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 do_prequeue:
 				tcp_prequeue_process(sk);
 
-				if ((chunk = len - tp->ucopy.len) != 0) {
+				chunk = len - tp->ucopy.len;
+				if (chunk != 0) {
 					NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
 					len -= chunk;
 					copied += chunk;
@@ -2230,7 +2238,8 @@ int tcp_disconnect(struct sock *sk, int flags)
 	sk->sk_shutdown = 0;
 	sock_reset_flag(sk, SOCK_DONE);
 	tp->srtt_us = 0;
-	if ((tp->write_seq += tp->max_window + 2) == 0)
+	tp->write_seq += tp->max_window + 2;
+	if (tp->write_seq == 0)
 		tp->write_seq = 1;
 	icsk->icsk_backoff = 0;
 	tp->snd_cwnd = 2;
@@ -2253,13 +2262,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 }
 EXPORT_SYMBOL(tcp_disconnect);
 
-void tcp_sock_destruct(struct sock *sk)
-{
-	inet_sock_destruct(sk);
-
-	kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
-}
-
 static inline bool tcp_can_repair_sock(const struct sock *sk)
 {
 	return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
@@ -2581,7 +2583,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		    TCPF_LISTEN))) {
 			tcp_fastopen_init_key_once(true);
 
-			err = fastopen_init_queue(sk, val);
+			fastopen_queue_tune(sk, val);
 		} else {
 			err = -EINVAL;
 		}
@@ -2849,10 +2851,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_FASTOPEN:
-		if (icsk->icsk_accept_queue.fastopenq)
-			val = icsk->icsk_accept_queue.fastopenq->max_qlen;
-		else
-			val = 0;
+		val = icsk->icsk_accept_queue.fastopenq.max_qlen;
 		break;
 
 	case TCP_TIMESTAMP:
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 93c4dc3ab23f..882caa4e72bc 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -173,6 +173,10 @@ out:
 	 */
 	if (ca->get_info)
 		memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+	if (ca->flags & TCP_CONG_NEEDS_ECN)
+		INET_ECN_xmit(sk);
+	else
+		INET_ECN_dontxmit(sk);
 }
 
 void tcp_init_congestion_control(struct sock *sk)
@@ -181,6 +185,10 @@ void tcp_init_congestion_control(struct sock *sk)
 
 	if (icsk->icsk_ca_ops->init)
 		icsk->icsk_ca_ops->init(sk);
+	if (tcp_ca_needs_ecn(sk))
+		INET_ECN_xmit(sk);
+	else
+		INET_ECN_dontxmit(sk);
 }
 
 static void tcp_reinit_congestion_control(struct sock *sk,
@@ -192,8 +200,8 @@ static void tcp_reinit_congestion_control(struct sock *sk,
 	icsk->icsk_ca_ops = ca;
 	icsk->icsk_ca_setsockopt = 1;
 
-	if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
-		icsk->icsk_ca_ops->init(sk);
+	if (sk->sk_state != TCP_CLOSE)
+		tcp_init_congestion_control(sk);
 }
 
 /* Manage refcounts on socket close. */
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f9c0fb84e435..55be6ac70cff 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -124,27 +124,29 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
 	return false;
 }
 
-static bool tcp_fastopen_create_child(struct sock *sk,
-				      struct sk_buff *skb,
-				      struct dst_entry *dst,
-				      struct request_sock *req)
+static struct sock *tcp_fastopen_create_child(struct sock *sk,
+					      struct sk_buff *skb,
+					      struct dst_entry *dst,
+					      struct request_sock *req)
 {
 	struct tcp_sock *tp;
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 	struct sock *child;
 	u32 end_seq;
+	bool own_req;
 
 	req->num_retrans = 0;
 	req->num_timeout = 0;
 	req->sk = NULL;
 
-	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+							 NULL, &own_req);
 	if (!child)
-		return false;
+		return NULL;
 
-	spin_lock(&queue->fastopenq->lock);
-	queue->fastopenq->qlen++;
-	spin_unlock(&queue->fastopenq->lock);
+	spin_lock(&queue->fastopenq.lock);
+	queue->fastopenq.qlen++;
+	spin_unlock(&queue->fastopenq.lock);
 
 	/* Initialize the child socket. Have to fix some values to take
 	 * into account the child is a Fast Open socket and is created
@@ -161,15 +163,13 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
 
 	/* Activate the retrans timer so that SYNACK can be retransmitted.
-	 * The request socket is not added to the SYN table of the parent
+	 * The request socket is not added to the ehash
 	 * because it's been added to the accept queue directly.
 	 */
 	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
 				  TCP_TIMEOUT_INIT, TCP_RTO_MAX);
 
-	atomic_set(&req->rsk_refcnt, 1);
-	/* Add the child socket directly into the accept queue */
-	inet_csk_reqsk_queue_add(sk, req, child);
+	atomic_set(&req->rsk_refcnt, 2);
 
 	/* Now finish processing the fastopen child socket. */
 	inet_csk(child)->icsk_af_ops->rebuild_header(child);
@@ -178,12 +178,10 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 	tcp_init_metrics(child);
 	tcp_init_buffer_space(child);
 
-	/* Queue the data carried in the SYN packet. We need to first
-	 * bump skb's refcnt because the caller will attempt to free it.
-	 * Note that IPv6 might also have used skb_get() trick
-	 * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts)
-	 * So we need to eventually get a clone of the packet,
-	 * before inserting it in sk_receive_queue.
+	/* Queue the data carried in the SYN packet.
+	 * We used to play tricky games with skb_get().
+	 * With lockless listener, it is a dead end.
+	 * Do not think about it.
 	 *
 	 * XXX (TFO) - we honor a zero-payload TFO request for now,
 	 * (any reason not to?) but no need to queue the skb since
@@ -191,12 +189,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 	 */
 	end_seq = TCP_SKB_CB(skb)->end_seq;
 	if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
-		struct sk_buff *skb2;
-
-		if (unlikely(skb_shared(skb)))
-			skb2 = skb_clone(skb, GFP_ATOMIC);
-		else
-			skb2 = skb_get(skb);
+		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 
 		if (likely(skb2)) {
 			skb_dst_drop(skb2);
@@ -214,11 +207,10 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 		}
 	}
 	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
-	sk->sk_data_ready(sk);
-	bh_unlock_sock(child);
-	sock_put(child);
-	WARN_ON(!req->sk);
-	return true;
+	/* tcp_conn_request() is sending the SYNACK,
+	 * and queues the child into listener accept queue.
+	 */
+	return child;
 }
 
 static bool tcp_fastopen_queue_check(struct sock *sk)
@@ -235,8 +227,8 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
 	 * between qlen overflow causing Fast Open to be disabled
 	 * temporarily vs a server not supporting Fast Open at all.
 	 */
-	fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
-	if (!fastopenq || fastopenq->max_qlen == 0)
+	fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+	if (fastopenq->max_qlen == 0)
 		return false;
 
 	if (fastopenq->qlen >= fastopenq->max_qlen) {
@@ -261,13 +253,14 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
  * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
  * cookie request (foc->len == 0).
  */
-bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
-		      struct request_sock *req,
-		      struct tcp_fastopen_cookie *foc,
-		      struct dst_entry *dst)
+struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+			      struct request_sock *req,
+			      struct tcp_fastopen_cookie *foc,
+			      struct dst_entry *dst)
 {
 	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+	struct sock *child;
 
 	if (foc->len == 0) /* Client requests a cookie */
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
@@ -276,7 +269,7 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 	      (syn_data || foc->len >= 0) &&
 	      tcp_fastopen_queue_check(sk))) {
 		foc->len = -1;
-		return false;
+		return NULL;
 	}
 
 	if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
@@ -296,11 +289,12 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
 		 * data in SYN_RECV state.
 		 */
 fastopen:
-		if (tcp_fastopen_create_child(sk, skb, dst, req)) {
+		child = tcp_fastopen_create_child(sk, skb, dst, req);
+		if (child) {
 			foc->len = -1;
 			NET_INC_STATS_BH(sock_net(sk),
 					 LINUX_MIB_TCPFASTOPENPASSIVE);
-			return true;
+			return child;
 		}
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 	} else if (foc->len > 0) /* Client presents an invalid cookie */
@@ -308,6 +302,5 @@ fastopen:
 
 	valid_foc.exp = foc->exp;
 	*foc = valid_foc;
-	return false;
+	return NULL;
 }
-EXPORT_SYMBOL(tcp_try_fastopen);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8f515bb19c4..fdd88c3803a6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -95,6 +95,7 @@ int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 int sysctl_tcp_frto __read_mostly = 2;
+int sysctl_tcp_min_rtt_wlen __read_mostly = 300;
 
 int sysctl_tcp_thin_dupack __read_mostly;
 
@@ -880,6 +881,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 
 	if (metric > 0)
 		tcp_disable_early_retrans(tp);
+	tp->rack.reord = 1;
 }
 
 /* This must be called before lost_out is incremented */
@@ -905,8 +907,7 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
 	}
 }
 
-static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp,
-					    struct sk_buff *skb)
+void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	tcp_verify_retransmit_hint(tp, skb);
 
@@ -1047,70 +1048,6 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
 	return !before(start_seq, end_seq - tp->max_window);
 }
 
-/* Check for lost retransmit. This superb idea is borrowed from "ratehalving".
- * Event "B". Later note: FACK people cheated me again 8), we have to account
- * for reordering! Ugly, but should help.
- *
- * Search retransmitted skbs from write_queue that were sent when snd_nxt was
- * less than what is now known to be received by the other end (derived from
- * highest SACK block). Also calculate the lowest snd_nxt among the remaining
- * retransmitted skbs to avoid some costly processing per ACKs.
- */
-static void tcp_mark_lost_retrans(struct sock *sk, int *flag)
-{
-	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb;
-	int cnt = 0;
-	u32 new_low_seq = tp->snd_nxt;
-	u32 received_upto = tcp_highest_sack_seq(tp);
-
-	if (!tcp_is_fack(tp) || !tp->retrans_out ||
-	    !after(received_upto, tp->lost_retrans_low) ||
-	    icsk->icsk_ca_state != TCP_CA_Recovery)
-		return;
-
-	tcp_for_write_queue(skb, sk) {
-		u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
-
-		if (skb == tcp_send_head(sk))
-			break;
-		if (cnt == tp->retrans_out)
-			break;
-		if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
-			continue;
-
-		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
-			continue;
-
-		/* TODO: We would like to get rid of tcp_is_fack(tp) only
-		 * constraint here (see above) but figuring out that at
-		 * least tp->reordering SACK blocks reside between ack_seq
-		 * and received_upto is not easy task to do cheaply with
-		 * the available datastructures.
-		 *
-		 * Whether FACK should check here for tp->reordering segs
-		 * in-between one could argue for either way (it would be
-		 * rather simple to implement as we could count fack_count
-		 * during the walk and do tp->fackets_out - fack_count).
-		 */
-		if (after(received_upto, ack_seq)) {
-			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
-			tp->retrans_out -= tcp_skb_pcount(skb);
-			*flag |= FLAG_LOST_RETRANS;
-			tcp_skb_mark_lost_uncond_verify(tp, skb);
-			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
-		} else {
-			if (before(ack_seq, new_low_seq))
-				new_low_seq = ack_seq;
-			cnt += tcp_skb_pcount(skb);
-		}
-	}
-
-	if (tp->retrans_out)
-		tp->lost_retrans_low = new_low_seq;
-}
-
 static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 			    struct tcp_sack_block_wire *sp, int num_sacks,
 			    u32 prior_snd_una)
@@ -1236,6 +1173,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
 		return sacked;
 
 	if (!(sacked & TCPCB_SACKED_ACKED)) {
+		tcp_rack_advance(tp, xmit_time, sacked);
+
 		if (sacked & TCPCB_SACKED_RETRANS) {
 			/* If the segment is not tagged as lost,
 			 * we do not clear RETRANS, believing
@@ -1837,7 +1776,6 @@ advance_sp:
 	    ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
 		tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
 
-	tcp_mark_lost_retrans(sk, &state->flag);
 	tcp_verify_left_out(tp);
 out:
 
@@ -2314,14 +2252,29 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
+static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
+{
+	return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
+	       before(tp->rx_opt.rcv_tsecr, when);
+}
+
+/* skb is spurious retransmitted if the returned timestamp echo
+ * reply is prior to the skb transmission time
+ */
+static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
+				     const struct sk_buff *skb)
+{
+	return (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) &&
+	       tcp_tsopt_ecr_before(tp, tcp_skb_timestamp(skb));
+}
+
 /* Nothing was retransmitted or returned timestamp is less
  * than timestamp of the first retransmission.
  */
 static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
 {
 	return !tp->retrans_stamp ||
-		(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
-		 before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
+	       tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
 }
 
 /* Undo procedures. */
@@ -2853,6 +2806,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 		}
 	}
 
+	/* Use RACK to detect loss */
+	if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
+	    tcp_rack_mark_lost(sk))
+		flag |= FLAG_LOST_RETRANS;
+
 	/* E. Process state. */
 	switch (icsk->icsk_ca_state) {
 	case TCP_CA_Recovery:
@@ -2915,8 +2873,69 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 	tcp_xmit_retransmit_queue(sk);
 }
 
+/* Kathleen Nichols' algorithm for tracking the minimum value of
+ * a data stream over some fixed time interval. (E.g., the minimum
+ * RTT over the past five minutes.) It uses constant space and constant
+ * time per update yet almost always delivers the same minimum as an
+ * implementation that has to keep all the data in the window.
+ *
+ * The algorithm keeps track of the best, 2nd best & 3rd best min
+ * values, maintaining an invariant that the measurement time of the
+ * n'th best >= n-1'th best. It also makes sure that the three values
+ * are widely separated in the time window since that bounds the worse
+ * case error when that data is monotonically increasing over the window.
+ *
+ * Upon getting a new min, we can forget everything earlier because it
+ * has no value - the new min is <= everything else in the window by
+ * definition and it's the most recent. So we restart fresh on every new min
+ * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
+ * best.
+ */
+static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
+{
+	const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
+	struct rtt_meas *m = tcp_sk(sk)->rtt_min;
+	struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
+	u32 elapsed;
+
+	/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
+	if (unlikely(rttm.rtt <= m[0].rtt))
+		m[0] = m[1] = m[2] = rttm;
+	else if (rttm.rtt <= m[1].rtt)
+		m[1] = m[2] = rttm;
+	else if (rttm.rtt <= m[2].rtt)
+		m[2] = rttm;
+
+	elapsed = now - m[0].ts;
+	if (unlikely(elapsed > wlen)) {
+		/* Passed entire window without a new min so make 2nd choice
+		 * the new min & 3rd choice the new 2nd. So forth and so on.
+		 */
+		m[0] = m[1];
+		m[1] = m[2];
+		m[2] = rttm;
+		if (now - m[0].ts > wlen) {
+			m[0] = m[1];
+			m[1] = rttm;
+			if (now - m[0].ts > wlen)
+				m[0] = rttm;
+		}
+	} else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
+		/* Passed a quarter of the window without a new min so
+		 * take 2nd choice from the 2nd quarter of the window.
+		 */
+		m[2] = m[1] = rttm;
+	} else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
+		/* Passed half the window without a new min so take the 3rd
+		 * choice from the last half of the window.
+		 */
+		m[2] = rttm;
+	}
+}
+
 static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
-				      long seq_rtt_us, long sack_rtt_us)
+				      long seq_rtt_us, long sack_rtt_us,
+				      long ca_rtt_us)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2925,9 +2944,6 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 * Karn's algorithm forbids taking RTT if some retransmitted data
 	 * is acked (RFC6298).
 	 */
-	if (flag & FLAG_RETRANS_DATA_ACKED)
-		seq_rtt_us = -1L;
-
 	if (seq_rtt_us < 0)
 		seq_rtt_us = sack_rtt_us;
 
@@ -2939,11 +2955,16 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 	 */
 	if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 	    flag & FLAG_ACKED)
-		seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - tp->rx_opt.rcv_tsecr);
-
+		seq_rtt_us = ca_rtt_us = jiffies_to_usecs(tcp_time_stamp -
+							  tp->rx_opt.rcv_tsecr);
 	if (seq_rtt_us < 0)
 		return false;
 
+	/* ca_rtt_us >= 0 is counting on the invariant that ca_rtt_us is
+	 * always taken together with ACK, SACK, or TS-opts. Any negative
+	 * values will be skipped with the seq_rtt_us < 0 check above.
+	 */
+	tcp_update_rtt_min(sk, ca_rtt_us);
 	tcp_rtt_estimator(sk, seq_rtt_us);
 	tcp_set_rto(sk);
 
@@ -2953,21 +2974,21 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
 }
 
 /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
-static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
+void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
-	long seq_rtt_us = -1L;
+	long rtt_us = -1L;
 
-	if (synack_stamp && !tp->total_retrans)
-		seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
+	if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
+		struct skb_mstamp now;
 
-	/* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
-	 * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
-	 */
-	if (!tp->srtt_us)
-		tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
+		skb_mstamp_get(&now);
+		rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
+	}
+
+	tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L, rtt_us);
 }
 
+
 static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3131,6 +3152,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
 		if (sacked & TCPCB_SACKED_ACKED)
 			tp->sacked_out -= acked_pcount;
+		else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
+			tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
 		if (sacked & TCPCB_LOST)
 			tp->lost_out -= acked_pcount;
 
@@ -3169,7 +3192,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		flag |= FLAG_SACK_RENEGING;
 
 	skb_mstamp_get(&now);
-	if (likely(first_ackt.v64)) {
+	if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
 		seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
 		ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
 	}
@@ -3178,7 +3201,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
 	}
 
-	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
+	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
+					ca_rtt_us);
 
 	if (flag & FLAG_ACKED) {
 		tcp_rearm_rto(sk);
@@ -5472,7 +5496,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 }
 
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
-					 const struct tcphdr *th, unsigned int len)
+					 const struct tcphdr *th)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -5698,15 +5722,14 @@ reset_and_undo:
  *	address independent.
  */
 
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
-			  const struct tcphdr *th, unsigned int len)
+int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcphdr *th = tcp_hdr(skb);
 	struct request_sock *req;
 	int queued = 0;
 	bool acceptable;
-	u32 synack_stamp;
 
 	tp->rx_opt.saw_tstamp = 0;
 
@@ -5750,7 +5773,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		goto discard;
 
 	case TCP_SYN_SENT:
-		queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
+		queued = tcp_rcv_synsent_state_process(sk, skb, th);
 		if (queued >= 0)
 			return queued;
 
@@ -5785,15 +5808,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		if (!acceptable)
 			return 1;
 
+		if (!tp->srtt_us)
+			tcp_synack_rtt_meas(sk, req);
+
 		/* Once we leave TCP_SYN_RECV, we no longer need req
 		 * so release it.
 		 */
 		if (req) {
-			synack_stamp = tcp_rsk(req)->snt_synack;
 			tp->total_retrans = req->num_retrans;
 			reqsk_fastopen_remove(sk, req, false);
 		} else {
-			synack_stamp = tp->lsndtime;
 			/* Make sure socket is routed, for correct metrics. */
 			icsk->icsk_af_ops->rebuild_header(sk);
 			tcp_init_congestion_control(sk);
@@ -5816,7 +5840,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 		tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
 		tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
 		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-		tcp_synack_rtt_meas(sk, synack_stamp);
 
 		if (tp->rx_opt.tstamp_ok)
 			tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -6023,11 +6046,11 @@ static void tcp_openreq_init(struct request_sock *req,
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 
-	req->rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
+	req->rsk_rcv_wnd = 0;		/* So that tcp_send_synack() knows! */
 	req->cookie_ts = 0;
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
-	tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	skb_mstamp_get(&tcp_rsk(req)->snt_synack);
 	tcp_rsk(req)->last_oow_ack_time = 0;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
@@ -6043,9 +6066,11 @@ static void tcp_openreq_init(struct request_sock *req,
 }
 
 struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
-				      struct sock *sk_listener)
+				      struct sock *sk_listener,
+				      bool attach_listener)
 {
-	struct request_sock *req = reqsk_alloc(ops, sk_listener);
+	struct request_sock *req = reqsk_alloc(ops, sk_listener,
+					       attach_listener);
 
 	if (req) {
 		struct inet_request_sock *ireq = inet_rsk(req);
@@ -6065,13 +6090,13 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
 /*
  * Return true if a syncookie should be sent
  */
-static bool tcp_syn_flood_action(struct sock *sk,
+static bool tcp_syn_flood_action(const struct sock *sk,
 				 const struct sk_buff *skb,
 				 const char *proto)
 {
+	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
 	const char *msg = "Dropping request";
 	bool want_cookie = false;
-	struct listen_sock *lopt;
 
 #ifdef CONFIG_SYN_COOKIES
 	if (sysctl_tcp_syncookies) {
@@ -6082,12 +6107,12 @@ static bool tcp_syn_flood_action(struct sock *sk,
 #endif
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
 
-	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
-	if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
-		lopt->synflood_warned = 1;
+	if (!queue->synflood_warned &&
+	    sysctl_tcp_syncookies != 2 &&
+	    xchg(&queue->synflood_warned, 1) == 0)
 		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
 			proto, ntohs(tcp_hdr(skb)->dest), msg);
-	}
+
 	return want_cookie;
 }
 
@@ -6112,16 +6137,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
 {
+	struct tcp_fastopen_cookie foc = { .len = -1 };
+	__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
 	struct tcp_options_received tmp_opt;
-	struct request_sock *req;
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct sock *fastopen_sk = NULL;
 	struct dst_entry *dst = NULL;
-	__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
-	bool want_cookie = false, fastopen;
+	struct request_sock *req;
+	bool want_cookie = false;
 	struct flowi fl;
-	struct tcp_fastopen_cookie foc = { .len = -1 };
-	int err;
-
 
 	/* TW buckets are converted to open requests without
 	 * limitations, they conserve resources and peer is
@@ -6145,7 +6169,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		goto drop;
 	}
 
-	req = inet_reqsk_alloc(rsk_ops, sk);
+	req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
 	if (!req)
 		goto drop;
 
@@ -6228,20 +6252,30 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	}
 
 	tcp_rsk(req)->snt_isn = isn;
+	tcp_rsk(req)->txhash = net_tx_rndhash();
 	tcp_openreq_init_rwin(req, sk, dst);
-	fastopen = !want_cookie &&
-		   tcp_try_fastopen(sk, skb, req, &foc, dst);
-	err = af_ops->send_synack(sk, dst, &fl, req,
-				  skb_get_queue_mapping(skb), &foc);
-	if (!fastopen) {
-		if (err || want_cookie)
-			goto drop_and_free;
-
+	if (!want_cookie) {
+		tcp_reqsk_record_syn(sk, req, skb);
+		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
+	}
+	if (fastopen_sk) {
+		af_ops->send_synack(fastopen_sk, dst, &fl, req,
+				    &foc, false);
+		/* Add the child socket directly into the accept queue */
+		inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
+		sk->sk_data_ready(sk);
+		bh_unlock_sock(fastopen_sk);
+		sock_put(fastopen_sk);
+	} else {
 		tcp_rsk(req)->tfo_listener = false;
-		af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+		if (!want_cookie)
+			inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+		af_ops->send_synack(sk, dst, &fl, req,
+				    &foc, !want_cookie);
+		if (want_cookie)
+			goto drop_and_free;
 	}
-	tcp_reqsk_record_syn(sk, req, skb);
-
+	reqsk_put(req);
 	return 0;
 
 drop_and_release:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 93898e093d4e..950e28c0cdf2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -324,7 +324,6 @@ void tcp_req_err(struct sock *sk, u32 seq)
 
 	if (seq != tcp_rsk(req)->snt_isn) {
 		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-		reqsk_put(req);
 	} else {
 		/*
 		 * Still in SYN_RECV, just remove it silently.
@@ -332,9 +331,10 @@ void tcp_req_err(struct sock *sk, u32 seq)
 		 * created socket, and POSIX does not want network
 		 * errors returned from accept().
 		 */
-		NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
 		inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+		NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
 	}
+	reqsk_put(req);
 }
 EXPORT_SYMBOL(tcp_req_err);
 
@@ -576,7 +576,7 @@ EXPORT_SYMBOL(tcp_v4_send_check);
  *	Exception: precedence violation. We do not implement it in any case.
  */
 
-static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
@@ -795,7 +795,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
 	inet_twsk_put(tw);
 }
 
-static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req)
 {
 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
@@ -803,7 +803,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 	 */
 	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
-			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+			tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
 			tcp_time_stamp,
 			req->ts_recent,
 			0,
@@ -818,11 +818,11 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  *	This still operates on a request_sock only, not on a big
  *	socket.
  */
-static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
+static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 			      struct flowi *fl,
 			      struct request_sock *req,
-			      u16 queue_mapping,
-			      struct tcp_fastopen_cookie *foc)
+			      struct tcp_fastopen_cookie *foc,
+				  bool attach_req)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	struct flowi4 fl4;
@@ -833,12 +833,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		return -1;
 
-	skb = tcp_make_synack(sk, dst, req, foc);
+	skb = tcp_make_synack(sk, dst, req, foc, attach_req);
 
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
-		skb_set_queue_mapping(skb, queue_mapping);
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
 					    ireq->opt);
@@ -865,7 +864,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
  */
 
 /* Find the Key structure for an address.  */
-struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
+struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
 					 const union tcp_md5_addr *addr,
 					 int family)
 {
@@ -877,7 +876,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 	/* caller either holds rcu_read_lock() or socket lock */
 	md5sig = rcu_dereference_check(tp->md5sig_info,
 				       sock_owned_by_user(sk) ||
-				       lockdep_is_held(&sk->sk_lock.slock));
+				       lockdep_is_held((spinlock_t *)&sk->sk_lock.slock));
 	if (!md5sig)
 		return NULL;
 #if IS_ENABLED(CONFIG_IPV6)
@@ -894,7 +893,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
 }
 EXPORT_SYMBOL(tcp_md5_do_lookup);
 
-struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
+struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
 					 const struct sock *addr_sk)
 {
 	const union tcp_md5_addr *addr;
@@ -1112,10 +1111,13 @@ clear_hash_noput:
 }
 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
 
+#endif
+
 /* Called with rcu_read_lock() */
-static bool tcp_v4_inbound_md5_hash(struct sock *sk,
+static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
 				    const struct sk_buff *skb)
 {
+#ifdef CONFIG_TCP_MD5SIG
 	/*
 	 * This gets called for each TCP segment that arrives
 	 * so we want to be efficient.
@@ -1165,10 +1167,12 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk,
 		return true;
 	}
 	return false;
-}
 #endif
+	return false;
+}
 
-static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
+static void tcp_v4_init_req(struct request_sock *req,
+			    const struct sock *sk_listener,
 			    struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
@@ -1179,7 +1183,8 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
 	ireq->opt = tcp_v4_save_options(skb);
 }
 
-static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
+static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
+					  struct flowi *fl,
 					  const struct request_sock *req,
 					  bool *strict)
 {
@@ -1218,7 +1223,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 	.route_req	=	tcp_v4_route_req,
 	.init_seq	=	tcp_v4_init_sequence,
 	.send_synack	=	tcp_v4_send_synack,
-	.queue_hash_add =	inet_csk_reqsk_queue_hash_add,
 };
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -1241,9 +1245,11 @@ EXPORT_SYMBOL(tcp_v4_conn_request);
  * The three way handshake has completed - we got a valid synack -
  * now create the new socket.
  */
-struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req,
-				  struct dst_entry *dst)
+				  struct dst_entry *dst,
+				  struct request_sock *req_unhash,
+				  bool *own_req)
 {
 	struct inet_request_sock *ireq;
 	struct inet_sock *newinet;
@@ -1277,7 +1283,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	newinet->rcv_tos      = ip_hdr(skb)->tos;
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	sk_set_txhash(newsk);
 	if (inet_opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 	newinet->inet_id = newtp->write_seq ^ jiffies;
@@ -1320,7 +1325,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
-	__inet_hash_nolisten(newsk, NULL);
+	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	if (*own_req)
+		tcp_move_syn(newtp, req);
 
 	return newsk;
 
@@ -1338,34 +1345,11 @@ put_and_exit:
 }
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 
-static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
 {
+#ifdef CONFIG_SYN_COOKIES
 	const struct tcphdr *th = tcp_hdr(skb);
-	const struct iphdr *iph = ip_hdr(skb);
-	struct request_sock *req;
-	struct sock *nsk;
-
-	req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
-	if (req) {
-		nsk = tcp_check_req(sk, skb, req, false);
-		if (!nsk || nsk == sk)
-			reqsk_put(req);
-		return nsk;
-	}
-
-	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
-			th->source, iph->daddr, th->dest, inet_iif(skb));
-
-	if (nsk) {
-		if (nsk->sk_state != TCP_TIME_WAIT) {
-			bh_lock_sock(nsk);
-			return nsk;
-		}
-		inet_twsk_put(inet_twsk(nsk));
-		return NULL;
-	}
 
-#ifdef CONFIG_SYN_COOKIES
 	if (!th->syn)
 		sk = cookie_v4_check(sk, skb);
 #endif
@@ -1373,7 +1357,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 }
 
 /* The socket must have it's spinlock held when we get
- * here.
+ * here, unless it is a TCP_LISTEN socket.
  *
  * We have a potential double-lock case here, so even when
  * doing backlog processing we use the BH locking scheme.
@@ -1404,13 +1388,13 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 		goto csum_err;
 
 	if (sk->sk_state == TCP_LISTEN) {
-		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
+		struct sock *nsk = tcp_v4_cookie_check(sk, skb);
+
 		if (!nsk)
 			goto discard;
-
 		if (nsk != sk) {
 			sock_rps_save_rxhash(nsk, skb);
-			sk_mark_napi_id(sk, skb);
+			sk_mark_napi_id(nsk, skb);
 			if (tcp_child_process(sk, nsk, skb)) {
 				rsk = nsk;
 				goto reset;
@@ -1420,7 +1404,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 	} else
 		sock_rps_save_rxhash(sk, skb);
 
-	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
+	if (tcp_rcv_state_process(sk, skb)) {
 		rsk = sk;
 		goto reset;
 	}
@@ -1590,6 +1574,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
 	TCP_SKB_CB(skb)->sacked	 = 0;
 
+lookup:
 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
 	if (!sk)
 		goto no_tcp_socket;
@@ -1598,6 +1583,33 @@ process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
 
+	if (sk->sk_state == TCP_NEW_SYN_RECV) {
+		struct request_sock *req = inet_reqsk(sk);
+		struct sock *nsk = NULL;
+
+		sk = req->rsk_listener;
+		if (tcp_v4_inbound_md5_hash(sk, skb))
+			goto discard_and_relse;
+		if (likely(sk->sk_state == TCP_LISTEN)) {
+			nsk = tcp_check_req(sk, skb, req, false);
+		} else {
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
+			goto lookup;
+		}
+		if (!nsk) {
+			reqsk_put(req);
+			goto discard_it;
+		}
+		if (nsk == sk) {
+			sock_hold(sk);
+			reqsk_put(req);
+		} else if (tcp_child_process(sk, nsk, skb)) {
+			tcp_v4_send_reset(nsk, skb);
+			goto discard_it;
+		} else {
+			return 0;
+		}
+	}
 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
@@ -1606,25 +1618,23 @@ process:
 	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-#ifdef CONFIG_TCP_MD5SIG
-	/*
-	 * We really want to reject the packet as early as possible
-	 * if:
-	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
-	 *  o There is an MD5 option and we're not expecting one
-	 */
 	if (tcp_v4_inbound_md5_hash(sk, skb))
 		goto discard_and_relse;
-#endif
 
 	nf_reset(skb);
 
 	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
-	sk_incoming_cpu_update(sk);
 	skb->dev = NULL;
 
+	if (sk->sk_state == TCP_LISTEN) {
+		ret = tcp_v4_do_rcv(sk, skb);
+		goto put_and_return;
+	}
+
+	sk_incoming_cpu_update(sk);
+
 	bh_lock_sock_nested(sk);
 	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	ret = 0;
@@ -1639,6 +1649,7 @@ process:
 	}
 	bh_unlock_sock(sk);
 
+put_and_return:
 	sock_put(sk);
 
 	return ret;
@@ -1833,35 +1844,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 	++st->num;
 	++st->offset;
 
-	if (st->state == TCP_SEQ_STATE_OPENREQ) {
-		struct request_sock *req = cur;
-
-		icsk = inet_csk(st->syn_wait_sk);
-		req = req->dl_next;
-		while (1) {
-			while (req) {
-				if (req->rsk_ops->family == st->family) {
-					cur = req;
-					goto out;
-				}
-				req = req->dl_next;
-			}
-			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
-				break;
-get_req:
-			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
-		}
-		sk	  = sk_nulls_next(st->syn_wait_sk);
-		st->state = TCP_SEQ_STATE_LISTENING;
-		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-	} else {
-		icsk = inet_csk(sk);
-		spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-		if (reqsk_queue_len(&icsk->icsk_accept_queue))
-			goto start_req;
-		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-		sk = sk_nulls_next(sk);
-	}
+	sk = sk_nulls_next(sk);
 get_sk:
 	sk_nulls_for_each_from(sk, node) {
 		if (!net_eq(sock_net(sk), net))
@@ -1871,16 +1854,6 @@ get_sk:
 			goto out;
 		}
 		icsk = inet_csk(sk);
-		spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
-start_req:
-			st->uid		= sock_i_uid(sk);
-			st->syn_wait_sk = sk;
-			st->state	= TCP_SEQ_STATE_OPENREQ;
-			st->sbucket	= 0;
-			goto get_req;
-		}
-		spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 	}
 	spin_unlock_bh(&ilb->lock);
 	st->offset = 0;
@@ -2012,7 +1985,6 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
 	void *rc = NULL;
 
 	switch (st->state) {
-	case TCP_SEQ_STATE_OPENREQ:
 	case TCP_SEQ_STATE_LISTENING:
 		if (st->bucket >= INET_LHTABLE_SIZE)
 			break;
@@ -2071,7 +2043,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	}
 
 	switch (st->state) {
-	case TCP_SEQ_STATE_OPENREQ:
 	case TCP_SEQ_STATE_LISTENING:
 		rc = listening_get_next(seq, v);
 		if (!rc) {
@@ -2096,11 +2067,6 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
 	struct tcp_iter_state *st = seq->private;
 
 	switch (st->state) {
-	case TCP_SEQ_STATE_OPENREQ:
-		if (v) {
-			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
-			spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-		}
 	case TCP_SEQ_STATE_LISTENING:
 		if (v != SEQ_START_TOKEN)
 			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
@@ -2154,7 +2120,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 EXPORT_SYMBOL(tcp_proc_unregister);
 
 static void get_openreq4(const struct request_sock *req,
-			 struct seq_file *f, int i, kuid_t uid)
+			 struct seq_file *f, int i)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	long delta = req->rsk_timer.expires - jiffies;
@@ -2171,7 +2137,8 @@ static void get_openreq4(const struct request_sock *req,
 		1,    /* timers active (only the expire timer) */
 		jiffies_delta_to_clock_t(delta),
 		req->num_timeout,
-		from_kuid_munged(seq_user_ns(f), uid),
+		from_kuid_munged(seq_user_ns(f),
+				 sock_i_uid(req->rsk_listener)),
 		0,  /* non standard timer */
 		0, /* open_requests have no inode */
 		0,
@@ -2185,7 +2152,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct inet_sock *inet = inet_sk(sk);
-	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
+	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
 	__be32 dest = inet->inet_daddr;
 	__be32 src = inet->inet_rcv_saddr;
 	__u16 destp = ntohs(inet->inet_dport);
@@ -2272,18 +2239,12 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
 	}
 	st = seq->private;
 
-	switch (st->state) {
-	case TCP_SEQ_STATE_LISTENING:
-	case TCP_SEQ_STATE_ESTABLISHED:
-		if (sk->sk_state == TCP_TIME_WAIT)
-			get_timewait4_sock(v, seq, st->num);
-		else
-			get_tcp4_sock(v, seq, st->num);
-		break;
-	case TCP_SEQ_STATE_OPENREQ:
-		get_openreq4(v, seq, st->num, st->uid);
-		break;
-	}
+	if (sk->sk_state == TCP_TIME_WAIT)
+		get_timewait4_sock(v, seq, st->num);
+	else if (sk->sk_state == TCP_NEW_SYN_RECV)
+		get_openreq4(v, seq, st->num);
+	else
+		get_tcp4_sock(v, seq, st->num);
 out:
 	seq_pad(seq, '\n');
 	return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index def765911ff8..ac6b1961ffeb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -361,30 +361,38 @@ void tcp_twsk_destructor(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
 
+/* Warning : This function is called without sk_listener being locked.
+ * Be sure to read socket fields once, as their value could change under us.
+ */
 void tcp_openreq_init_rwin(struct request_sock *req,
-			   struct sock *sk, struct dst_entry *dst)
+			   const struct sock *sk_listener,
+			   const struct dst_entry *dst)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
-	struct tcp_sock *tp = tcp_sk(sk);
-	__u8 rcv_wscale;
+	const struct tcp_sock *tp = tcp_sk(sk_listener);
+	u16 user_mss = READ_ONCE(tp->rx_opt.user_mss);
+	int full_space = tcp_full_space(sk_listener);
 	int mss = dst_metric_advmss(dst);
+	u32 window_clamp;
+	__u8 rcv_wscale;
 
-	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
-		mss = tp->rx_opt.user_mss;
+	if (user_mss && user_mss < mss)
+		mss = user_mss;
 
+	window_clamp = READ_ONCE(tp->window_clamp);
 	/* Set this up on the first call only */
-	req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+	req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW);
 
 	/* limit the window selection if the user enforce a smaller rx buffer */
-	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
-	    (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
-		req->window_clamp = tcp_full_space(sk);
+	if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK &&
+	    (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+		req->rsk_window_clamp = full_space;
 
 	/* tcp_full_space because it is guaranteed to be the first packet */
-	tcp_select_initial_window(tcp_full_space(sk),
+	tcp_select_initial_window(full_space,
 		mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
-		&req->rcv_wnd,
-		&req->window_clamp,
+		&req->rsk_rcv_wnd,
+		&req->rsk_window_clamp,
 		ireq->wscale_ok,
 		&rcv_wscale,
 		dst_metric(dst, RTAX_INITRWND));
@@ -433,7 +441,9 @@ EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
  * Actually, we could lots of memory writes here. tp of listening
  * socket contains all necessary default parameters.
  */
-struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
+struct sock *tcp_create_openreq_child(const struct sock *sk,
+				      struct request_sock *req,
+				      struct sk_buff *skb)
 {
 	struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
 
@@ -460,6 +470,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 
 		newtp->srtt_us = 0;
 		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+		newtp->rtt_min[0].rtt = ~0U;
 		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
 
 		newtp->packets_out = 0;
@@ -469,7 +480,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 		tcp_enable_early_retrans(newtp);
 		newtp->tlp_high_seq = 0;
-		newtp->lsndtime = treq->snt_synack;
+		newtp->lsndtime = treq->snt_synack.stamp_jiffies;
+		newsk->sk_txhash = treq->txhash;
 		newtp->last_oow_ack_time = 0;
 		newtp->total_retrans = req->num_retrans;
 
@@ -501,9 +513,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 			if (sysctl_tcp_fack)
 				tcp_enable_fack(newtp);
 		}
-		newtp->window_clamp = req->window_clamp;
-		newtp->rcv_ssthresh = req->rcv_wnd;
-		newtp->rcv_wnd = req->rcv_wnd;
+		newtp->window_clamp = req->rsk_window_clamp;
+		newtp->rcv_ssthresh = req->rsk_rcv_wnd;
+		newtp->rcv_wnd = req->rsk_rcv_wnd;
 		newtp->rx_opt.wscale_ok = ireq->wscale_ok;
 		if (newtp->rx_opt.wscale_ok) {
 			newtp->rx_opt.snd_wscale = ireq->snd_wscale;
@@ -536,9 +548,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_ecn_openreq_child(newtp, req);
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
-
-		newtp->saved_syn = req->saved_syn;
-		req->saved_syn = NULL;
+		newtp->rack.mstamp.v64 = 0;
+		newtp->rack.advanced = 0;
 
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
@@ -566,8 +577,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	bool paws_reject = false;
-
-	BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
+	bool own_req;
 
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
@@ -698,7 +708,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	/* RFC793: "first check sequence number". */
 
 	if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-					  tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
+					  tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
 		/* Out of window: send ACK and drop. */
 		if (!(flg & TCP_FLAG_RST))
 			req->rsk_ops->send_ack(sk, skb, req);
@@ -755,16 +765,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 * ESTABLISHED STATE. If it will be dropped after
 	 * socket is created, wait for troubles.
 	 */
-	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
+	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
+							 req, &own_req);
 	if (!child)
 		goto listen_overflow;
 
-	inet_csk_reqsk_queue_drop(sk, req);
-	inet_csk_reqsk_queue_add(sk, req, child);
-	/* Warning: caller must not call reqsk_put(req);
-	 * child stole last reference on it.
-	 */
-	return child;
+	sock_rps_save_rxhash(child, skb);
+	tcp_synack_rtt_meas(child, req);
+	return inet_csk_complete_hashdance(sk, child, req, own_req);
 
 listen_overflow:
 	if (!sysctl_tcp_abort_on_overflow) {
@@ -811,8 +819,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	int state = child->sk_state;
 
 	if (!sock_owned_by_user(child)) {
-		ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
-					    skb->len);
+		ret = tcp_rcv_state_process(child, skb);
 		/* Wakeup parent, send SIGIO */
 		if (state == TCP_SYN_RECV && child->sk_state != state)
 			parent->sk_data_ready(parent);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3dbee0d83b15..cb7ca569052c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -357,14 +357,10 @@ static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
 }
 
 static void
-tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
-		    struct sock *sk)
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
 {
-	if (inet_rsk(req)->ecn_ok) {
+	if (inet_rsk(req)->ecn_ok)
 		th->ece = 1;
-		if (tcp_ca_needs_ecn(sk))
-			INET_ECN_xmit(sk);
-	}
 }
 
 /* Set up ECN state for a packet on a ESTABLISHED socket that is about to
@@ -612,12 +608,11 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 }
 
 /* Set up TCP options for SYN-ACKs. */
-static unsigned int tcp_synack_options(struct sock *sk,
-				   struct request_sock *req,
-				   unsigned int mss, struct sk_buff *skb,
-				   struct tcp_out_options *opts,
-				   const struct tcp_md5sig_key *md5,
-				   struct tcp_fastopen_cookie *foc)
+static unsigned int tcp_synack_options(struct request_sock *req,
+				       unsigned int mss, struct sk_buff *skb,
+				       struct tcp_out_options *opts,
+				       const struct tcp_md5sig_key *md5,
+				       struct tcp_fastopen_cookie *foc)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@ -1827,7 +1822,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 
 	/* Ok, it looks like it is advisable to defer. */
 
-	if (cong_win < send_win && cong_win < skb->len)
+	if (cong_win < send_win && cong_win <= skb->len)
 		*is_cwnd_limited = true;
 
 	return true;
@@ -2060,7 +2055,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 
 		cwnd_quota = tcp_cwnd_test(tp, skb);
 		if (!cwnd_quota) {
-			is_cwnd_limited = true;
 			if (push_one == 2)
 				/* Force out a loss probe pkt. */
 				cwnd_quota = 1;
@@ -2142,6 +2136,7 @@ repair:
 		/* Send one loss probe per tail loss episode. */
 		if (push_one != 2)
 			tcp_schedule_loss_probe(sk);
+		is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
 		tcp_cwnd_validate(sk, is_cwnd_limited);
 		return false;
 	}
@@ -2165,7 +2160,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	/* Don't do any loss probe on a Fast Open connection before 3WHS
 	 * finishes.
 	 */
-	if (sk->sk_state == TCP_SYN_RECV)
+	if (tp->fastopen_rsk)
 		return false;
 
 	/* TLP is only scheduled when next timer event is RTO. */
@@ -2175,7 +2170,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 	/* Schedule a loss probe in 2*RTT for SACK capable connections
 	 * in Open state, that are either limited by cwnd or application.
 	 */
-	if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
+	if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
 	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
 		return false;
 
@@ -2184,9 +2179,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 		return false;
 
 	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
-	 * for delayed ack when there's one outstanding packet.
+	 * for delayed ack when there's one outstanding packet. If no RTT
+	 * sample is available then probe after TCP_TIMEOUT_INIT.
 	 */
-	timeout = rtt << 1;
+	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
 	if (tp->packets_out == 1)
 		timeout = max_t(u32, timeout,
 				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
@@ -2659,8 +2655,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 			net_dbg_ratelimited("retrans_out leaked\n");
 		}
 #endif
-		if (!tp->retrans_out)
-			tp->lost_retrans_low = tp->snd_nxt;
 		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
 		tp->retrans_out += tcp_skb_pcount(skb);
 
@@ -2668,10 +2662,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 		if (!tp->retrans_stamp)
 			tp->retrans_stamp = tcp_skb_timestamp(skb);
 
-		/* snd_nxt is stored to detect loss of retransmitted segment,
-		 * see tcp_input.c tcp_sacktag_write_queue().
-		 */
-		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
 	} else if (err != -EBUSY) {
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
 	}
@@ -2949,20 +2939,22 @@ int tcp_send_synack(struct sock *sk)
  * Allocate one skb and build a SYNACK packet.
  * @dst is consumed : Caller should not use it again.
  */
-struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
-				struct tcp_fastopen_cookie *foc)
+				struct tcp_fastopen_cookie *foc,
+				bool attach_req)
 {
-	struct tcp_out_options opts;
 	struct inet_request_sock *ireq = inet_rsk(req);
-	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcphdr *th;
-	struct sk_buff *skb;
+	const struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_md5sig_key *md5 = NULL;
+	struct tcp_out_options opts;
+	struct sk_buff *skb;
 	int tcp_header_size;
+	struct tcphdr *th;
+	u16 user_mss;
 	int mss;
 
-	skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
+	skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
 	if (unlikely(!skb)) {
 		dst_release(dst);
 		return NULL;
@@ -2970,11 +2962,21 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	/* Reserve space for headers. */
 	skb_reserve(skb, MAX_TCP_HEADER);
 
+	if (attach_req) {
+		skb_set_owner_w(skb, req_to_sk(req));
+	} else {
+		/* sk is a const pointer, because we want to express multiple
+		 * cpu might call us concurrently.
+		 * sk->sk_wmem_alloc in an atomic, we can promote to rw.
+		 */
+		skb_set_owner_w(skb, (struct sock *)sk);
+	}
 	skb_dst_set(skb, dst);
 
 	mss = dst_metric_advmss(dst);
-	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
-		mss = tp->rx_opt.user_mss;
+	user_mss = READ_ONCE(tp->rx_opt.user_mss);
+	if (user_mss && user_mss < mss)
+		mss = user_mss;
 
 	memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
@@ -2988,8 +2990,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	rcu_read_lock();
 	md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
 #endif
-	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
-					     foc) + sizeof(*th);
+	skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+	tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
+			  sizeof(*th);
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
@@ -2998,7 +3001,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	memset(th, 0, sizeof(struct tcphdr));
 	th->syn = 1;
 	th->ack = 1;
-	tcp_ecn_make_synack(req, th, sk);
+	tcp_ecn_make_synack(req, th);
 	th->source = htons(ireq->ir_num);
 	th->dest = ireq->ir_rmt_port;
 	/* Setting of flags are superfluous here for callers (and ECE is
@@ -3012,8 +3015,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
 
 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
-	th->window = htons(min(req->rcv_wnd, 65535U));
-	tcp_options_write((__be32 *)(th + 1), tp, &opts);
+	th->window = htons(min(req->rsk_rcv_wnd, 65535U));
+	tcp_options_write((__be32 *)(th + 1), NULL, &opts);
 	th->doff = (tcp_header_size >> 2);
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
 
@@ -3500,13 +3503,14 @@ void tcp_send_probe0(struct sock *sk)
 				  TCP_RTO_MAX);
 }
 
-int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
+int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
 {
 	const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
 	struct flowi fl;
 	int res;
 
-	res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
+	tcp_rsk(req)->txhash = net_tx_rndhash();
+	res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true);
 	if (!res) {
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
new file mode 100644
index 000000000000..5353085fd0b2
--- /dev/null
+++ b/net/ipv4/tcp_recovery.c
@@ -0,0 +1,109 @@
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOST_RETRANS;
+
+/* Marks a packet lost, if some packet sent later has been (s)acked.
+ * The underlying idea is similar to the traditional dupthresh and FACK
+ * but they look at different metrics:
+ *
+ * dupthresh: 3 OOO packets delivered (packet count)
+ * FACK: sequence delta to highest sacked sequence (sequence space)
+ * RACK: sent time delta to the latest delivered packet (time domain)
+ *
+ * The advantage of RACK is it applies to both original and retransmitted
+ * packet and therefore is robust against tail losses. Another advantage
+ * is being more resilient to reordering by simply allowing some
+ * "settling delay", instead of tweaking the dupthresh.
+ *
+ * The current version is only used after recovery starts but can be
+ * easily extended to detect the first loss.
+ */
+int tcp_rack_mark_lost(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	u32 reo_wnd, prior_retrans = tp->retrans_out;
+
+	if (inet_csk(sk)->icsk_ca_state < TCP_CA_Recovery || !tp->rack.advanced)
+		return 0;
+
+	/* Reset the advanced flag to avoid unnecessary queue scanning */
+	tp->rack.advanced = 0;
+
+	/* To be more reordering resilient, allow min_rtt/4 settling delay
+	 * (lower-bounded to 1000uS). We use min_rtt instead of the smoothed
+	 * RTT because reordering is often a path property and less related
+	 * to queuing or delayed ACKs.
+	 *
+	 * TODO: measure and adapt to the observed reordering delay, and
+	 * use a timer to retransmit like the delayed early retransmit.
+	 */
+	reo_wnd = 1000;
+	if (tp->rack.reord && tcp_min_rtt(tp) != ~0U)
+		reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
+
+	tcp_for_write_queue(skb, sk) {
+		struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+
+		if (skb == tcp_send_head(sk))
+			break;
+
+		/* Skip ones already (s)acked */
+		if (!after(scb->end_seq, tp->snd_una) ||
+		    scb->sacked & TCPCB_SACKED_ACKED)
+			continue;
+
+		if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) {
+
+			if (skb_mstamp_us_delta(&tp->rack.mstamp,
+						&skb->skb_mstamp) <= reo_wnd)
+				continue;
+
+			/* skb is lost if packet sent later is sacked */
+			tcp_skb_mark_lost_uncond_verify(tp, skb);
+			if (scb->sacked & TCPCB_SACKED_RETRANS) {
+				scb->sacked &= ~TCPCB_SACKED_RETRANS;
+				tp->retrans_out -= tcp_skb_pcount(skb);
+				NET_INC_STATS_BH(sock_net(sk),
+						 LINUX_MIB_TCPLOSTRETRANSMIT);
+			}
+		} else if (!(scb->sacked & TCPCB_RETRANS)) {
+			/* Original data are sent sequentially so stop early
+			 * b/c the rest are all sent after rack_sent
+			 */
+			break;
+		}
+	}
+	return prior_retrans - tp->retrans_out;
+}
+
+/* Record the most recently (re)sent time among the (s)acked packets */
+void tcp_rack_advance(struct tcp_sock *tp,
+		      const struct skb_mstamp *xmit_time, u8 sacked)
+{
+	if (tp->rack.mstamp.v64 &&
+	    !skb_mstamp_after(xmit_time, &tp->rack.mstamp))
+		return;
+
+	if (sacked & TCPCB_RETRANS) {
+		struct skb_mstamp now;
+
+		/* If the sacked packet was retransmitted, it's ambiguous
+		 * whether the retransmission or the original (or the prior
+		 * retransmission) was sacked.
+		 *
+		 * If the original is lost, there is no ambiguity. Otherwise
+		 * we assume the original can be delayed up to aRTT + min_rtt.
+		 * the aRTT term is bounded by the fast recovery or timeout,
+		 * so it's at least one RTT (i.e., retransmission is at least
+		 * an RTT later).
+		 */
+		skb_mstamp_get(&now);
+		if (skb_mstamp_us_delta(&now, xmit_time) < tcp_min_rtt(tp))
+			return;
+	}
+
+	tp->rack.mstamp = *xmit_time;
+	tp->rack.advanced = 1;
+}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7149ebc820c7..c9c716a483e4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -83,7 +83,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
 }
 
 /* Calculate maximal number or retries on an orphaned socket. */
-static int tcp_orphan_retries(struct sock *sk, int alive)
+static int tcp_orphan_retries(struct sock *sk, bool alive)
 {
 	int retries = sysctl_tcp_orphan_retries; /* May be zero. */
 
@@ -184,7 +184,7 @@ static int tcp_write_timeout(struct sock *sk)
 
 		retry_until = sysctl_tcp_retries2;
 		if (sock_flag(sk, SOCK_DEAD)) {
-			const int alive = icsk->icsk_rto < TCP_RTO_MAX;
+			const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
 
 			retry_until = tcp_orphan_retries(sk, alive);
 			do_reset = alive ||
@@ -298,7 +298,7 @@ static void tcp_probe_timer(struct sock *sk)
 
 	max_probes = sysctl_tcp_retries2;
 	if (sock_flag(sk, SOCK_DEAD)) {
-		const int alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
+		const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
 
 		max_probes = tcp_orphan_retries(sk, alive);
 		if (!alive && icsk->icsk_backoff >= max_probes)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f7d1d5e19e95..24ec14f9825c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -375,7 +375,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
 			return -1;
 		score += 4;
 	}
-
+	if (sk->sk_incoming_cpu == raw_smp_processor_id())
+		score++;
 	return score;
 }
 
@@ -419,6 +420,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 		score += 4;
 	}
 
+	if (sk->sk_incoming_cpu == raw_smp_processor_id())
+		score++;
+
 	return score;
 }
 
@@ -1017,30 +1021,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 		fl4 = &fl4_stack;
 
-		/* unconnected socket. If output device is enslaved to a VRF
-		 * device lookup source address from VRF table. This mimics
-		 * behavior of ip_route_connect{_init}.
-		 */
-		if (netif_index_is_vrf(net, ipc.oif)) {
-			flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
-					   RT_SCOPE_UNIVERSE, sk->sk_protocol,
-					   (flow_flags | FLOWI_FLAG_VRFSRC |
-					    FLOWI_FLAG_SKIP_NH_OIF),
-					   faddr, saddr, dport,
-					   inet->inet_sport);
-
-			rt = ip_route_output_flow(net, fl4, sk);
-			if (!IS_ERR(rt)) {
-				saddr = fl4->saddr;
-				ip_rt_put(rt);
-			}
-		}
-
 		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
 				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 				   flow_flags,
 				   faddr, saddr, dport, inet->inet_sport);
 
+		if (!saddr && ipc.oif)
+			l3mdev_get_saddr(net, ipc.oif, fl4);
+
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 		rt = ip_route_output_flow(net, fl4, sk);
 		if (IS_ERR(rt)) {
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 60b032f58ccc..62e1e72db461 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -22,7 +22,8 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 	return xfrm4_extract_header(skb);
 }
 
-static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb)
+static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
+					 struct sk_buff *skb)
 {
 	if (!skb_dst(skb)) {
 		const struct iphdr *iph = ip_hdr(skb);
@@ -52,8 +53,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
 
-	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
-		skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+		dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		xfrm4_rcv_encap_finish);
 	return 0;
 }
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 41a261355662..7ee6518afa86 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -82,24 +82,25 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
 	return xfrm_output(sk, skb);
 }
 
-static int __xfrm4_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
 
 #ifdef CONFIG_NETFILTER
 	if (!x) {
 		IPCB(skb)->flags |= IPSKB_REROUTED;
-		return dst_output_sk(sk, skb);
+		return dst_output(net, sk, skb);
 	}
 #endif
 
 	return x->outer_mode->afinfo->output_finish(sk, skb);
 }
 
-int xfrm4_output(struct sock *sk, struct sk_buff *skb)
+int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, skb_dst(skb)->dev, __xfrm4_output,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, skb_dst(skb)->dev,
+			    __xfrm4_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c10a9ee68433..1e0c3c835a63 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,7 +15,7 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
 
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
 
@@ -97,6 +97,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_gateway = rt->rt_gateway;
 	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+	xdst->u.rt.rt_table_id = rt->rt_table_id;
 	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
 
 	return 0;
@@ -110,10 +111,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	struct flowi4 *fl4 = &fl->u.ip4;
 	int oif = 0;
 
-	if (skb_dst(skb)) {
-		oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
-			: skb_dst(skb)->dev->ifindex;
-	}
+	if (skb_dst(skb))
+		oif = l3mdev_fib_oif(skb_dst(skb)->dev);
 
 	memset(fl4, 0, sizeof(struct flowi4));
 	fl4->flowi4_mark = skb->mark;
@@ -128,7 +127,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 		case IPPROTO_DCCP:
 			if (xprth + 4 < skb->data ||
 			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be16 *ports = (__be16 *)xprth;
+				__be16 *ports;
+
+				xprth = skb_network_header(skb) + iph->ihl * 4;
+				ports = (__be16 *)xprth;
 
 				fl4->fl4_sport = ports[!!reverse];
 				fl4->fl4_dport = ports[!reverse];
@@ -136,8 +138,12 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 			break;
 
 		case IPPROTO_ICMP:
-			if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
-				u8 *icmp = xprth;
+			if (xprth + 2 < skb->data ||
+			    pskb_may_pull(skb, xprth + 2 - skb->data)) {
+				u8 *icmp;
+
+				xprth = skb_network_header(skb) + iph->ihl * 4;
+				icmp = xprth;
 
 				fl4->fl4_icmp_type = icmp[0];
 				fl4->fl4_icmp_code = icmp[1];
@@ -145,33 +151,50 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 			break;
 
 		case IPPROTO_ESP:
-			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be32 *ehdr = (__be32 *)xprth;
+			if (xprth + 4 < skb->data ||
+			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				__be32 *ehdr;
+
+				xprth = skb_network_header(skb) + iph->ihl * 4;
+				ehdr = (__be32 *)xprth;
 
 				fl4->fl4_ipsec_spi = ehdr[0];
 			}
 			break;
 
 		case IPPROTO_AH:
-			if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
-				__be32 *ah_hdr = (__be32 *)xprth;
+			if (xprth + 8 < skb->data ||
+			    pskb_may_pull(skb, xprth + 8 - skb->data)) {
+				__be32 *ah_hdr;
+
+				xprth = skb_network_header(skb) + iph->ihl * 4;
+				ah_hdr = (__be32 *)xprth;
 
 				fl4->fl4_ipsec_spi = ah_hdr[1];
 			}
 			break;
 
 		case IPPROTO_COMP:
-			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be16 *ipcomp_hdr = (__be16 *)xprth;
+			if (xprth + 4 < skb->data ||
+			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
+				__be16 *ipcomp_hdr;
+
+				xprth = skb_network_header(skb) + iph->ihl * 4;
+				ipcomp_hdr = (__be16 *)xprth;
 
 				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
 			}
 			break;
 
 		case IPPROTO_GRE:
-			if (pskb_may_pull(skb, xprth + 12 - skb->data)) {
-				__be16 *greflags = (__be16 *)xprth;
-				__be32 *gre_hdr = (__be32 *)xprth;
+			if (xprth + 12 < skb->data ||
+			    pskb_may_pull(skb, xprth + 12 - skb->data)) {
+				__be16 *greflags;
+				__be32 *gre_hdr;
+
+				xprth = skb_network_header(skb) + iph->ihl * 4;
+				greflags = (__be16 *)xprth;
+				gre_hdr = (__be32 *)xprth;
 
 				if (greflags[0] & GRE_KEY) {
 					if (greflags[0] & GRE_CSUM)
@@ -245,7 +268,7 @@ static struct dst_ops xfrm4_dst_ops = {
 	.destroy =		xfrm4_dst_destroy,
 	.ifdown =		xfrm4_dst_ifdown,
 	.local_out =		__ip_local_out,
-	.gc_thresh =		32768,
+	.gc_thresh =		INT_MAX,
 };
 
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 36b85bd05ac8..d84742f003a9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
 #include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
+#include <net/l3mdev.h>
 #include <linux/if_tunnel.h>
 #include <linux/rtnetlink.h>
 #include <linux/netconf.h>
@@ -417,6 +418,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	if (err) {
 		ipv6_mc_destroy_dev(ndev);
 		del_timer(&ndev->regen_timer);
+		snmp6_unregister_dev(ndev);
 		goto err_release;
 	}
 	/* protected by rtnl_lock */
@@ -2146,7 +2148,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		      unsigned long expires, u32 flags)
 {
 	struct fib6_config cfg = {
-		.fc_table = RT6_TABLE_PREFIX,
+		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
 		.fc_metric = IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_expires = expires,
@@ -2179,8 +2181,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 	struct fib6_node *fn;
 	struct rt6_info *rt = NULL;
 	struct fib6_table *table;
+	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
 
-	table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+	table = fib6_get_table(dev_net(dev), tb_id);
 	if (!table)
 		return NULL;
 
@@ -2211,7 +2214,7 @@ out:
 static void addrconf_add_mroute(struct net_device *dev)
 {
 	struct fib6_config cfg = {
-		.fc_table = RT6_TABLE_LOCAL,
+		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
 		.fc_metric = IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_dst_len = 8,
@@ -3029,6 +3032,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 {
 	struct in6_addr addr;
 
+	/* no link local addresses on L3 master devices */
+	if (netif_is_l3_master(idev->dev))
+		return;
+
 	ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
 
 	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
@@ -3141,6 +3148,32 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		}
 		break;
 
+	case NETDEV_CHANGEMTU:
+		/* if MTU under IPV6_MIN_MTU stop IPv6 on this interface. */
+		if (dev->mtu < IPV6_MIN_MTU) {
+			addrconf_ifdown(dev, 1);
+			break;
+		}
+
+		if (idev) {
+			rt6_mtu_change(dev, dev->mtu);
+			idev->cnf.mtu6 = dev->mtu;
+			break;
+		}
+
+		/* allocate new idev */
+		idev = ipv6_add_dev(dev);
+		if (IS_ERR(idev))
+			break;
+
+		/* device is still not ready */
+		if (!(idev->if_flags & IF_READY))
+			break;
+
+		run_pending = 1;
+
+		/* fall through */
+
 	case NETDEV_UP:
 	case NETDEV_CHANGE:
 		if (dev->flags & IFF_SLAVE)
@@ -3164,7 +3197,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				idev->if_flags |= IF_READY;
 				run_pending = 1;
 			}
-		} else {
+		} else if (event == NETDEV_CHANGE) {
 			if (!addrconf_qdisc_ok(dev)) {
 				/* device is still not ready. */
 				break;
@@ -3229,24 +3262,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 		}
 		break;
 
-	case NETDEV_CHANGEMTU:
-		if (idev && dev->mtu >= IPV6_MIN_MTU) {
-			rt6_mtu_change(dev, dev->mtu);
-			idev->cnf.mtu6 = dev->mtu;
-			break;
-		}
-
-		if (!idev && dev->mtu >= IPV6_MIN_MTU) {
-			idev = ipv6_add_dev(dev);
-			if (!IS_ERR(idev))
-				break;
-		}
-
-		/*
-		 * if MTU under IPV6_MIN_MTU.
-		 * Stop IPv6 on this interface.
-		 */
-
 	case NETDEV_DOWN:
 	case NETDEV_UNREGISTER:
 		/*
@@ -3627,7 +3642,7 @@ static void addrconf_dad_work(struct work_struct *w)
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL);
+	ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL);
 out:
 	in6_ifa_put(ifp);
 	rtnl_unlock();
@@ -4731,7 +4746,8 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 	}
 }
 
-static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
+static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
+				  u32 ext_filter_mask)
 {
 	struct nlattr *nla;
 	struct ifla_cacheinfo ci;
@@ -4751,6 +4767,9 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
 
 	/* XXX - MC not implemented */
 
+	if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
+		return 0;
+
 	nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
 	if (!nla)
 		goto nla_put_failure;
@@ -4778,7 +4797,8 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static size_t inet6_get_link_af_size(const struct net_device *dev)
+static size_t inet6_get_link_af_size(const struct net_device *dev,
+				     u32 ext_filter_mask)
 {
 	if (!__in6_dev_get(dev))
 		return 0;
@@ -4786,14 +4806,15 @@ static size_t inet6_get_link_af_size(const struct net_device *dev)
 	return inet6_ifla6_size();
 }
 
-static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
+			      u32 ext_filter_mask)
 {
 	struct inet6_dev *idev = __in6_dev_get(dev);
 
 	if (!idev)
 		return -ENODATA;
 
-	if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+	if (inet6_fill_ifla6_attrs(skb, idev, ext_filter_mask) < 0)
 		return -EMSGSIZE;
 
 	return 0;
@@ -4948,7 +4969,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 	if (!protoinfo)
 		goto nla_put_failure;
 
-	if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+	if (inet6_fill_ifla6_attrs(skb, idev, 0) < 0)
 		goto nla_put_failure;
 
 	nla_nest_end(skb, protoinfo);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9aadd57808a5..d70b0238f468 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -263,7 +263,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 
 void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sock_exterr_skb *serr;
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6c2b2132c8d3..36c5a98b0472 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
 #include <net/xfrm.h>
 #include <net/inet_common.h>
 #include <net/dsfield.h>
+#include <net/l3mdev.h>
 
 #include <asm/uaccess.h>
 
@@ -452,7 +453,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	 *	and anycast addresses will be checked later.
 	 */
 	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
-		net_dbg_ratelimited("icmp6_send: addr_any/mcast source\n");
+		net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
+				    &hdr->saddr, &hdr->daddr);
 		return;
 	}
 
@@ -460,7 +462,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	 *	Never answer to a ICMP packet.
 	 */
 	if (is_ineligible(skb)) {
-		net_dbg_ratelimited("icmp6_send: no reply to icmp error\n");
+		net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
+				    &hdr->saddr, &hdr->daddr);
 		return;
 	}
 
@@ -496,6 +499,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	else if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->ucast_oif;
 
+	if (!fl6.flowi6_oif)
+		fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
+
 	dst = icmpv6_route_lookup(net, skb, sk, &fl6);
 	if (IS_ERR(dst))
 		goto out;
@@ -509,7 +515,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 	len = skb->len - msg.offset;
 	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
 	if (len < 0) {
-		net_dbg_ratelimited("icmp: len problem\n");
+		net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
+				    &hdr->saddr, &hdr->daddr);
 		goto out_dst_release;
 	}
 
@@ -575,7 +582,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
 		fl6.saddr = *saddr;
-	fl6.flowi6_oif = skb->dev->ifindex;
+	fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	fl6.flowi6_mark = mark;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -781,7 +788,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		if (type & ICMPV6_INFOMSG_MASK)
 			break;
 
-		net_dbg_ratelimited("icmpv6: msg of unknown type\n");
+		net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
+				    saddr, daddr);
 
 		/*
 		 * error of unknown type.
diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c
index 678d2df4b8d9..1a6852e1ac69 100644
--- a/net/ipv6/ila.c
+++ b/net/ipv6/ila.c
@@ -91,7 +91,7 @@ static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
 	*(__be64 *)&ip6h->daddr = p->locator;
 }
 
-static int ila_output(struct sock *sk, struct sk_buff *skb)
+static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 
@@ -100,7 +100,7 @@ static int ila_output(struct sock *sk, struct sk_buff *skb)
 
 	update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
 
-	return dst->lwtstate->orig_output(sk, skb);
+	return dst->lwtstate->orig_output(net, sk, skb);
 
 drop:
 	kfree_skb(skb);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 6927f3fb5597..5d1c7cee2cb2 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -65,17 +65,18 @@ int inet6_csk_bind_conflict(const struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
 
-struct dst_entry *inet6_csk_route_req(struct sock *sk,
+struct dst_entry *inet6_csk_route_req(const struct sock *sk,
 				      struct flowi6 *fl6,
-				      const struct request_sock *req)
+				      const struct request_sock *req,
+				      u8 proto)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
-	struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *final_p, final;
 	struct dst_entry *dst;
 
 	memset(fl6, 0, sizeof(*fl6));
-	fl6->flowi6_proto = IPPROTO_TCP;
+	fl6->flowi6_proto = proto;
 	fl6->daddr = ireq->ir_v6_rmt_addr;
 	final_p = fl6_update_dst(fl6, np->opt, &final);
 	fl6->saddr = ireq->ir_v6_loc_addr;
@@ -91,73 +92,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 
 	return dst;
 }
-
-/*
- * request_sock (formerly open request) hash tables.
- */
-static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
-			   const u32 rnd, const u32 synq_hsize)
-{
-	u32 c;
-
-	c = jhash_3words((__force u32)raddr->s6_addr32[0],
-			 (__force u32)raddr->s6_addr32[1],
-			 (__force u32)raddr->s6_addr32[2],
-			 rnd);
-
-	c = jhash_2words((__force u32)raddr->s6_addr32[3],
-			 (__force u32)rport,
-			 c);
-
-	return c & (synq_hsize - 1);
-}
-
-struct request_sock *inet6_csk_search_req(struct sock *sk,
-					  const __be16 rport,
-					  const struct in6_addr *raddr,
-					  const struct in6_addr *laddr,
-					  const int iif)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	struct request_sock *req;
-	u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd,
-				   lopt->nr_table_entries);
-
-	spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
-	for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
-		const struct inet_request_sock *ireq = inet_rsk(req);
-
-		if (ireq->ir_rmt_port == rport &&
-		    req->rsk_ops->family == AF_INET6 &&
-		    ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
-		    ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
-		    (!ireq->ir_iif || ireq->ir_iif == iif)) {
-			atomic_inc(&req->rsk_refcnt);
-			WARN_ON(req->sk != NULL);
-			break;
-		}
-	}
-	spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
-
-	return req;
-}
-EXPORT_SYMBOL_GPL(inet6_csk_search_req);
-
-void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
-				    struct request_sock *req,
-				    const unsigned long timeout)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
-				      inet_rsk(req)->ir_rmt_port,
-				      lopt->hash_rnd, lopt->nr_table_entries);
-
-	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
-	inet_csk_reqsk_queue_added(sk, timeout);
-}
-EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+EXPORT_SYMBOL(inet6_csk_route_req);
 
 void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
 {
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 6ac8dad0138a..21ace5a2bf7c 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -114,6 +114,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
 				return -1;
 			score++;
 		}
+		if (sk->sk_incoming_cpu == raw_smp_processor_id())
+			score++;
 	}
 	return score;
 }
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 6cedc62b2abb..0c7e276c230e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -264,6 +264,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(fib6_get_table);
 
 static void __net_init fib6_tables_init(struct net *net)
 {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index adba03ac7ce9..9075acf081dd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,7 +47,7 @@
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
-int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
 		const struct inet6_protocol *ipprot;
@@ -109,7 +109,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (hdr->version != 6)
 		goto err;
 
-	IP6_ADD_STATS_BH(dev_net(dev), idev,
+	IP6_ADD_STATS_BH(net, idev,
 			 IPSTATS_MIB_NOECTPKTS +
 				(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
 			 max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
@@ -183,8 +183,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
-		       dev, NULL,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+		       net, NULL, skb, dev, NULL,
 		       ip6_rcv_finish);
 err:
 	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -199,9 +199,8 @@ drop:
  */
 
 
-static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
+static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct inet6_protocol *ipprot;
 	struct inet6_dev *idev;
 	unsigned int nhoff;
@@ -278,8 +277,8 @@ discard:
 
 int ip6_input(struct sk_buff *skb)
 {
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb,
-		       skb->dev, NULL,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+		       dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		       ip6_input_finish);
 }
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 08b62047c67f..eeca943f12dc 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -264,6 +264,9 @@ static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)
 	struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);
 	int err = -ENOSYS;
 
+	if (skb->encapsulation)
+		skb_set_inner_network_header(skb, nhoff);
+
 	iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));
 
 	rcu_read_lock();
@@ -280,6 +283,13 @@ out_unlock:
 	return err;
 }
 
+static int sit_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	skb->encapsulation = 1;
+	skb_shinfo(skb)->gso_type |= SKB_GSO_SIT;
+	return ipv6_gro_complete(skb, nhoff);
+}
+
 static struct packet_offload ipv6_packet_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IPV6),
 	.callbacks = {
@@ -292,6 +302,8 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
 static const struct net_offload sit_offload = {
 	.callbacks = {
 		.gso_segment	= ipv6_gso_segment,
+		.gro_receive    = ipv6_gro_receive,
+		.gro_complete   = sit_gro_complete,
 	},
 };
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f84ec4e9b2de..e6a7bd15b9b7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -55,8 +55,9 @@
 #include <net/xfrm.h>
 #include <net/checksum.h>
 #include <linux/mroute6.h>
+#include <net/l3mdev.h>
 
-static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
@@ -71,7 +72,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
-		    ((mroute6_socket(dev_net(dev), skb) &&
+		    ((mroute6_socket(net, skb) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 					 &ipv6_hdr(skb)->saddr))) {
@@ -82,19 +83,18 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
 			 */
 			if (newskb)
 				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
-					sk, newskb, NULL, newskb->dev,
+					net, sk, newskb, NULL, newskb->dev,
 					dev_loopback_xmit);
 
 			if (ipv6_hdr(skb)->hop_limit == 0) {
-				IP6_INC_STATS(dev_net(dev), idev,
+				IP6_INC_STATS(net, idev,
 					      IPSTATS_MIB_OUTDISCARDS);
 				kfree_skb(skb);
 				return 0;
 			}
 		}
 
-		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
-				skb->len);
+		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
 
 		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
 		    IPV6_ADDR_SCOPE_NODELOCAL &&
@@ -116,48 +116,49 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
 	}
 	rcu_read_unlock_bh();
 
-	IP6_INC_STATS(dev_net(dst->dev),
-		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 	kfree_skb(skb);
 	return -EINVAL;
 }
 
-static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 	    dst_allfrag(skb_dst(skb)) ||
 	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
-		return ip6_fragment(sk, skb, ip6_finish_output2);
+		return ip6_fragment(net, sk, skb, ip6_finish_output2);
 	else
-		return ip6_finish_output2(sk, skb);
+		return ip6_finish_output2(net, sk, skb);
 }
 
-int ip6_output(struct sock *sk, struct sk_buff *skb)
+int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb_dst(skb)->dev;
 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+
 	if (unlikely(idev->cnf.disable_ipv6)) {
-		IP6_INC_STATS(dev_net(dev), idev,
-			      IPSTATS_MIB_OUTDISCARDS);
+		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 		return 0;
 	}
 
-	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, dev,
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+			    net, sk, skb, NULL, dev,
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
 /*
- *	xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * Note : socket lock is not held for SYNACK packets, but might be modified
+ * by calls to skb_set_owner_w() and ipv6_local_error(),
+ * which are using proper atomic operations or spinlocks.
  */
-
-int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	     struct ipv6_txoptions *opt, int tclass)
 {
 	struct net *net = sock_net(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
 	struct in6_addr *first_hop = &fl6->daddr;
 	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr;
@@ -186,7 +187,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 			}
 			consume_skb(skb);
 			skb = skb2;
-			skb_set_owner_w(skb, sk);
+			/* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
+			 * it is safe to call in our context (socket lock not held)
+			 */
+			skb_set_owner_w(skb, (struct sock *)sk);
 		}
 		if (opt->opt_flen)
 			ipv6_push_frag_opts(skb, opt, &proto);
@@ -224,12 +228,20 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
-		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-			       NULL, dst->dev, dst_output_sk);
+		/* hooks should never assume socket lock is held.
+		 * we promote our socket to non const
+		 */
+		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+			       net, (struct sock *)sk, skb, NULL, dst->dev,
+			       dst_output);
 	}
 
 	skb->dev = dst->dev;
-	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
+	/* ipv6_local_error() does not require socket lock,
+	 * we promote our socket to non const
+	 */
+	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
+
 	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 	kfree_skb(skb);
 	return -EMSGSIZE;
@@ -317,10 +329,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 	return 0;
 }
 
-static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6_forward_finish(struct net *net, struct sock *sk,
+				     struct sk_buff *skb)
 {
 	skb_sender_cpu_clear(skb);
-	return dst_output_sk(sk, skb);
+	return dst_output(net, sk, skb);
 }
 
 static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@ -515,8 +528,8 @@ int ip6_forward(struct sk_buff *skb)
 
 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
-		       skb->dev, dst->dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+		       net, NULL, skb, skb->dev, dst->dev,
 		       ip6_forward_finish);
 
 error:
@@ -543,8 +556,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	skb_copy_secmark(to, from);
 }
 
-int ip6_fragment(struct sock *sk, struct sk_buff *skb,
-		 int (*output)(struct sock *, struct sk_buff *))
+int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct sk_buff *frag;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -557,7 +570,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 	__be32 frag_id;
 	int ptr, offset = 0, err = 0;
 	u8 *prevhdr, nexthdr = 0;
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 	nexthdr = *prevhdr;
@@ -591,6 +603,10 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 				    &ipv6_hdr(skb)->saddr);
 
+	if (skb->ip_summed == CHECKSUM_PARTIAL &&
+	    (err = skb_checksum_help(skb)))
+		goto fail;
+
 	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 	if (skb_has_frag_list(skb)) {
 		int first_len = skb_pagelen(skb);
@@ -679,7 +695,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
 				ip6_copy_metadata(frag, skb);
 			}
 
-			err = output(sk, skb);
+			err = output(net, sk, skb);
 			if (!err)
 				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 					      IPSTATS_MIB_FRAGCREATES);
@@ -719,10 +735,6 @@ slow_path_clean:
 	}
 
 slow_path:
-	if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
-	    skb_checksum_help(skb))
-		goto fail;
-
 	left = skb->len - hlen;		/* Space per frame */
 	ptr = hlen;			/* Where to start from */
 
@@ -807,7 +819,7 @@ slow_path:
 		/*
 		 *	Put this fragment into the sending queue.
 		 */
-		err = output(sk, frag);
+		err = output(net, sk, frag);
 		if (err)
 			goto fail;
 
@@ -889,7 +901,7 @@ out:
 	return dst;
 }
 
-static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
+static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
 			       struct dst_entry **dst, struct flowi6 *fl6)
 {
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -1020,7 +1032,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
  *	It returns a valid dst pointer on success, or a pointer encoded
  *	error code.
  */
-struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
 				      const struct in6_addr *final_dst)
 {
 	struct dst_entry *dst = NULL;
@@ -1032,7 +1044,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 	if (final_dst)
 		fl6->daddr = *final_dst;
 	if (!fl6->flowi6_oif)
-		fl6->flowi6_oif = dst->dev->ifindex;
+		fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
 
 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 }
@@ -1258,6 +1270,7 @@ static int __ip6_append_data(struct sock *sk,
 	struct rt6_info *rt = (struct rt6_info *)cork->dst;
 	struct ipv6_txoptions *opt = v6_cork->opt;
 	int csummode = CHECKSUM_NONE;
+	unsigned int maxnonfragsize, headersize;
 
 	skb = skb_peek_tail(queue);
 	if (!skb) {
@@ -1275,38 +1288,43 @@ static int __ip6_append_data(struct sock *sk,
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
 		     sizeof(struct frag_hdr);
 
-	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
-		unsigned int maxnonfragsize, headersize;
-
-		headersize = sizeof(struct ipv6hdr) +
-			     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
-			     (dst_allfrag(&rt->dst) ?
-			      sizeof(struct frag_hdr) : 0) +
-			     rt->rt6i_nfheader_len;
-
-		if (ip6_sk_ignore_df(sk))
-			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
-		else
-			maxnonfragsize = mtu;
+	headersize = sizeof(struct ipv6hdr) +
+		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
+		     (dst_allfrag(&rt->dst) ?
+		      sizeof(struct frag_hdr) : 0) +
+		     rt->rt6i_nfheader_len;
+
+	if (cork->length + length > mtu - headersize && dontfrag &&
+	    (sk->sk_protocol == IPPROTO_UDP ||
+	     sk->sk_protocol == IPPROTO_RAW)) {
+		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
+				sizeof(struct ipv6hdr));
+		goto emsgsize;
+	}
 
-		/* dontfrag active */
-		if ((cork->length + length > mtu - headersize) && dontfrag &&
-		    (sk->sk_protocol == IPPROTO_UDP ||
-		     sk->sk_protocol == IPPROTO_RAW)) {
-			ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
-						   sizeof(struct ipv6hdr));
-			goto emsgsize;
-		}
+	if (ip6_sk_ignore_df(sk))
+		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
+	else
+		maxnonfragsize = mtu;
 
-		if (cork->length + length > maxnonfragsize - headersize) {
+	if (cork->length + length > maxnonfragsize - headersize) {
 emsgsize:
-			ipv6_local_error(sk, EMSGSIZE, fl6,
-					 mtu - headersize +
-					 sizeof(struct ipv6hdr));
-			return -EMSGSIZE;
-		}
+		ipv6_local_error(sk, EMSGSIZE, fl6,
+				 mtu - headersize +
+				 sizeof(struct ipv6hdr));
+		return -EMSGSIZE;
 	}
 
+	/* CHECKSUM_PARTIAL only with no extension headers and when
+	 * we are not going to fragment
+	 */
+	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
+	    headersize == sizeof(struct ipv6hdr) &&
+	    length < mtu - headersize &&
+	    !(flags & MSG_MORE) &&
+	    rt->dst.dev->features & NETIF_F_V6_CSUM)
+		csummode = CHECKSUM_PARTIAL;
+
 	if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
 		sock_tx_timestamp(sk, &tx_flags);
 		if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
@@ -1314,16 +1332,6 @@ emsgsize:
 			tskey = sk->sk_tskey++;
 	}
 
-	/* If this is the first and only packet and device
-	 * supports checksum offloading, let's use it.
-	 * Use transhdrlen, same as IPv4, because partial
-	 * sums only work when transhdrlen is set.
-	 */
-	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
-	    length + fragheaderlen < mtu &&
-	    rt->dst.dev->features & NETIF_F_V6_CSUM &&
-	    !exthdrlen)
-		csummode = CHECKSUM_PARTIAL;
 	/*
 	 * Let's try using as much space as possible.
 	 * Use MTU if total length of the message fits into the MTU.
@@ -1686,7 +1694,7 @@ int ip6_send_skb(struct sk_buff *skb)
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 	int err;
 
-	err = ip6_local_out(skb);
+	err = ip6_local_out(net, skb->sk, skb);
 	if (err) {
 		if (err > 0)
 			err = net_xmit_errno(err);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0224c032dca5..0a8610b33d79 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -482,7 +482,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 		return -EMSGSIZE;
 	}
 
-	err = dst_output(skb);
+	err = dst_output(t->net, skb->sk, skb);
 	if (net_xmit_eval(err) == 0) {
 		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0e004cc42a22..ad19136086dd 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1985,13 +1985,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 }
 #endif
 
-static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
-	IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+	IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 			 IPSTATS_MIB_OUTOCTETS, skb->len);
-	return dst_output_sk(sk, skb);
+	return dst_output(net, sk, skb);
 }
 
 /*
@@ -2063,8 +2063,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
 
 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
-		       skb->dev, dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+		       net, NULL, skb, skb->dev, dev,
 		       ip6mr_forward2_finish);
 
 out_free:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 083b2927fc67..124338a39e29 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1645,8 +1645,8 @@ static void mld_sendpack(struct sk_buff *skb)
 	payload_len = skb->len;
 
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
-		      net->ipv6.igmp_sk, skb, NULL, skb->dev,
-		      dst_output_sk);
+		      net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
+		      dst_output);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
@@ -2008,8 +2008,9 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	}
 
 	skb_dst_set(skb, dst);
-	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-		      NULL, skb->dev, dst_output_sk);
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, skb->dev,
+		      dst_output);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index b9779d441b12..60c79a08e14a 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -118,7 +118,7 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
 
 struct mip6_report_rate_limiter {
 	spinlock_t lock;
-	struct timeval stamp;
+	ktime_t stamp;
 	int iif;
 	struct in6_addr src;
 	struct in6_addr dst;
@@ -184,20 +184,18 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
-static inline int mip6_report_rl_allow(struct timeval *stamp,
+static inline int mip6_report_rl_allow(ktime_t stamp,
 				       const struct in6_addr *dst,
 				       const struct in6_addr *src, int iif)
 {
 	int allow = 0;
 
 	spin_lock_bh(&mip6_report_rl.lock);
-	if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
-	    mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
+	if (!ktime_equal(mip6_report_rl.stamp, stamp) ||
 	    mip6_report_rl.iif != iif ||
 	    !ipv6_addr_equal(&mip6_report_rl.src, src) ||
 	    !ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
-		mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
-		mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
+		mip6_report_rl.stamp = stamp;
 		mip6_report_rl.iif = iif;
 		mip6_report_rl.src = *src;
 		mip6_report_rl.dst = *dst;
@@ -216,7 +214,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
 	struct ipv6_destopt_hao *hao = NULL;
 	struct xfrm_selector sel;
 	int offset;
-	struct timeval stamp;
+	ktime_t stamp;
 	int err = 0;
 
 	if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
@@ -230,9 +228,9 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
 					(skb_network_header(skb) + offset);
 	}
 
-	skb_get_timestamp(skb, &stamp);
+	stamp = skb_get_ktime(skb);
 
-	if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+	if (!mip6_report_rl_allow(stamp, &ipv6_hdr(skb)->daddr,
 				  hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
 				  opt->iif))
 		goto out;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 64a71354b069..3e0f855e1bea 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -67,6 +67,7 @@
 #include <net/flow.h>
 #include <net/ip6_checksum.h>
 #include <net/inet_common.h>
+#include <net/l3mdev.h>
 #include <linux/proc_fs.h>
 
 #include <linux/netfilter.h>
@@ -147,6 +148,7 @@ struct neigh_table nd_tbl = {
 	.gc_thresh2 =	 512,
 	.gc_thresh3 =	1024,
 };
+EXPORT_SYMBOL_GPL(nd_tbl);
 
 static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
 {
@@ -441,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb,
 
 	if (!dst) {
 		struct flowi6 fl6;
+		int oif = l3mdev_fib_oif(skb->dev);
 
-		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
+		if (oif != skb->dev->ifindex)
+			fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
 		dst = icmp6_dst_alloc(skb->dev, &fl6);
 		if (IS_ERR(dst)) {
 			kfree_skb(skb);
@@ -463,9 +468,9 @@ static void ndisc_send_skb(struct sk_buff *skb,
 	idev = __in6_dev_get(dst->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
-	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-		      NULL, dst->dev,
-		      dst_output_sk);
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+		      net, sk, skb, NULL, dst->dev,
+		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
@@ -474,8 +479,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
 	rcu_read_unlock();
 }
 
-void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
-		   const struct in6_addr *daddr,
+void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
 		   const struct in6_addr *solicited_addr,
 		   bool router, bool solicited, bool override, bool inc_opt)
 {
@@ -541,7 +545,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
 
 	read_lock_bh(&idev->lock);
 	list_for_each_entry(ifa, &idev->addr_list, if_list) {
-		ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr,
+		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
 			      /*router=*/ !!idev->cnf.forwarding,
 			      /*solicited=*/ false, /*override=*/ true,
 			      /*inc_opt=*/ true);
@@ -551,8 +555,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
 	in6_dev_put(idev);
 }
 
-void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
-		   const struct in6_addr *solicit,
+void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
 		   const struct in6_addr *daddr, const struct in6_addr *saddr,
 		   struct sk_buff *oskb)
 {
@@ -679,12 +682,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
 				  __func__, target);
 		}
-		ndisc_send_ns(dev, neigh, target, target, saddr, skb);
+		ndisc_send_ns(dev, target, target, saddr, skb);
 	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
 		neigh_app_ns(neigh);
 	} else {
 		addrconf_addr_solict_mult(target, &mcaddr);
-		ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb);
+		ndisc_send_ns(dev, target, &mcaddr, saddr, skb);
 	}
 }
 
@@ -768,7 +771,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 
 	ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
 	if (ifp) {
-
+have_ifp:
 		if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
 			if (dad) {
 				/*
@@ -794,6 +797,18 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 	} else {
 		struct net *net = dev_net(dev);
 
+		/* perhaps an address on the master device */
+		if (netif_is_l3_slave(dev)) {
+			struct net_device *mdev;
+
+			mdev = netdev_master_upper_dev_get_rcu(dev);
+			if (mdev) {
+				ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
+				if (ifp)
+					goto have_ifp;
+			}
+		}
+
 		idev = in6_dev_get(dev);
 		if (!idev) {
 			/* XXX: count this drop? */
@@ -828,7 +843,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 		is_router = idev->cnf.forwarding;
 
 	if (dad) {
-		ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
+		ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
 			      !!is_router, false, (ifp != NULL), true);
 		goto out;
 	}
@@ -849,8 +864,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
 			     NEIGH_UPDATE_F_WEAK_OVERRIDE|
 			     NEIGH_UPDATE_F_OVERRIDE);
 	if (neigh || !dev->header_ops) {
-		ndisc_send_na(dev, neigh, saddr, &msg->target,
-			      !!is_router,
+		ndisc_send_na(dev, saddr, &msg->target, !!is_router,
 			      true, (ifp != NULL && inc), inc);
 		if (neigh)
 			neigh_release(neigh);
@@ -1486,6 +1500,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	struct flowi6 fl6;
 	int rd_len;
 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+	int oif = l3mdev_fib_oif(dev);
 	bool ret;
 
 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1502,7 +1517,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
 	}
 
 	icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
-			 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
+			 &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
+
+	if (oif != skb->dev->ifindex)
+		fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (dst->error) {
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index b4de08a83e0b..d11c46833d61 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -18,9 +18,8 @@
 #include <net/ip6_checksum.h>
 #include <net/netfilter/nf_queue.h>
 
-int ip6_route_me_harder(struct sk_buff *skb)
+int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	unsigned int hh_len;
 	struct dst_entry *dst;
@@ -93,7 +92,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
 	}
 }
 
-static int nf_ip6_reroute(struct sk_buff *skb,
+static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
 			  const struct nf_queue_entry *entry)
 {
 	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +102,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
 		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
 		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
 		    skb->mark != rt_info->mark)
-			return ip6_route_me_harder(skb);
+			return ip6_route_me_harder(net, skb);
 	}
 	return 0;
 }
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0771991ed812..99425cf2819b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -117,7 +117,7 @@ ip6_packet_match(const struct sk_buff *skb,
 	if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
 		dprintf("VIA in mismatch (%s vs %s).%s\n",
 			indev, ip6info->iniface,
-			ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
+			ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
 		return false;
 	}
 
@@ -126,14 +126,14 @@ ip6_packet_match(const struct sk_buff *skb,
 	if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
 		dprintf("VIA out mismatch (%s vs %s).%s\n",
 			outdev, ip6info->outiface,
-			ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
+			ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
 		return false;
 	}
 
 /* ... might want to do something with class and flowlabel here ... */
 
 	/* look for the desired protocol header */
-	if((ip6info->flags & IP6T_F_PROTO)) {
+	if (ip6info->flags & IP6T_F_PROTO) {
 		int protohdr;
 		unsigned short _frag_off;
 
@@ -151,9 +151,9 @@ ip6_packet_match(const struct sk_buff *skb,
 				ip6info->proto);
 
 		if (ip6info->proto == protohdr) {
-			if(ip6info->invflags & IP6T_INV_PROTO) {
+			if (ip6info->invflags & IP6T_INV_PROTO)
 				return false;
-			}
+
 			return true;
 		}
 
@@ -275,7 +275,8 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
 	return 0;
 }
 
-static void trace_packet(const struct sk_buff *skb,
+static void trace_packet(struct net *net,
+			 const struct sk_buff *skb,
 			 unsigned int hook,
 			 const struct net_device *in,
 			 const struct net_device *out,
@@ -287,7 +288,6 @@ static void trace_packet(const struct sk_buff *skb,
 	const char *hookname, *chainname, *comment;
 	const struct ip6t_entry *iter;
 	unsigned int rulenum = 0;
-	struct net *net = dev_net(in ? in : out);
 
 	root = get_entry(private->entries, private->hook_entry[hook]);
 
@@ -314,10 +314,10 @@ ip6t_next_entry(const struct ip6t_entry *entry)
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
 ip6t_do_table(struct sk_buff *skb,
-	      unsigned int hook,
 	      const struct nf_hook_state *state,
 	      struct xt_table *table)
 {
+	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
@@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
 	acpar.hotdrop = false;
+	acpar.net     = state->net;
 	acpar.in      = state->in;
 	acpar.out     = state->out;
 	acpar.family  = NFPROTO_IPV6;
@@ -401,8 +402,8 @@ ip6t_do_table(struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
 		/* The packet is traced: log it */
 		if (unlikely(skb->nf_trace))
-			trace_packet(skb, hook, state->in, state->out,
-				     table->name, private, e);
+			trace_packet(state->net, skb, hook, state->in,
+				     state->out, table->name, private, e);
 #endif
 		/* Standard target? */
 		if (!t->u.kernel.target->target) {
@@ -442,8 +443,8 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!acpar.hotdrop);
 
- 	xt_write_recseq_end(addend);
- 	local_bh_enable();
+	xt_write_recseq_end(addend);
+	local_bh_enable();
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -560,7 +561,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 				pos = newpos;
 			}
 		}
-		next:
+next:
 		duprintf("Finished chain %u\n", hook);
 	}
 	return 1;
@@ -815,7 +816,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
    newinfo) */
 static int
 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
-                const struct ip6t_replace *repl)
+		const struct ip6t_replace *repl)
 {
 	struct ip6t_entry *iter;
 	unsigned int i;
@@ -1089,7 +1090,7 @@ static int compat_table_info(const struct xt_table_info *info,
 #endif
 
 static int get_info(struct net *net, void __user *user,
-                    const int *len, int compat)
+		    const int *len, int compat)
 {
 	char name[XT_TABLE_MAXNAMELEN];
 	struct xt_table *t;
@@ -1151,7 +1152,7 @@ static int get_info(struct net *net, void __user *user,
 
 static int
 get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
-            const int *len)
+	    const int *len)
 {
 	int ret;
 	struct ip6t_get_entries get;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 0ed841a3fa33..db29bbf41b59 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -39,7 +39,7 @@ static unsigned int
 reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_reject_info *reject = par->targinfo;
-	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
+	struct net *net = par->net;
 
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 1e4bf99ed16e..3deed5860a42 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -76,7 +76,7 @@ synproxy_send_tcp(const struct synproxy_net *snet,
 		nf_conntrack_get(nfct);
 	}
 
-	ip6_local_out(nskb);
+	ip6_local_out(net, nskb->sk, nskb);
 	return;
 
 free_nskb:
@@ -244,7 +244,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
 	synproxy_build_options(nth, opts);
 
 	synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
-	                  niph, nth, tcp_hdr_size);
+			  niph, nth, tcp_hdr_size);
 }
 
 static bool
@@ -275,7 +275,7 @@ static unsigned int
 synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_synproxy_info *info = par->targinfo;
-	struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+	struct synproxy_net *snet = synproxy_pernet(par->net);
 	struct synproxy_options opts = {};
 	struct tcphdr *th, _th;
 
@@ -316,11 +316,11 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
+static unsigned int ipv6_synproxy_hook(void *priv,
 				       struct sk_buff *skb,
 				       const struct nf_hook_state *nhs)
 {
-	struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
+	struct synproxy_net *snet = synproxy_pernet(nhs->net);
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	struct nf_conn_synproxy *synproxy;
@@ -458,14 +458,12 @@ static struct xt_target synproxy_tg6_reg __read_mostly = {
 static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
 	{
 		.hook		= ipv6_synproxy_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
 	},
 	{
 		.hook		= ipv6_synproxy_hook,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 790e0c6b19e1..1ee1b25df096 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -26,7 +26,7 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr)
 	return addr_type & IPV6_ADDR_UNICAST;
 }
 
-static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
+static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
 				     const struct net_device *dev, u8 flags)
 {
 	struct rt6_info *rt;
@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
 		lookup_flags |= RT6_LOOKUP_F_IFACE;
 	}
 
-	rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags);
+	rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
 	if (rt->dst.error)
 		goto out;
 
@@ -93,7 +93,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (unlikely(saddrtype == IPV6_ADDR_ANY))
 		return true ^ invert; /* not routable: forward path will drop it */
 
-	return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert;
+	return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert;
 }
 
 static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 5c33d8abc077..8b277b983ca5 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,12 +32,10 @@ static const struct xt_table packet_filter = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_filter_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	const struct net *net = dev_net(state->in ? state->in : state->out);
-
-	return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter);
 }
 
 static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b551f5b79fe2..abe278b07932 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -57,8 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state,
-			    dev_net(state->out)->ipv6.ip6table_mangle);
+	ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -66,7 +65,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 	     skb->mark != mark ||
 	     ipv6_hdr(skb)->hop_limit != hop_limit ||
 	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
-		err = ip6_route_me_harder(skb);
+		err = ip6_route_me_harder(state->net, skb);
 		if (err < 0)
 			ret = NF_DROP_ERR(err);
 	}
@@ -76,17 +75,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_mangle_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	if (ops->hooknum == NF_INET_LOCAL_OUT)
+	if (state->hook == NF_INET_LOCAL_OUT)
 		return ip6t_mangle_out(skb, state);
-	if (ops->hooknum == NF_INET_POST_ROUTING)
-		return ip6t_do_table(skb, ops->hooknum, state,
-				     dev_net(state->out)->ipv6.ip6table_mangle);
+	if (state->hook == NF_INET_POST_ROUTING)
+		return ip6t_do_table(skb, state,
+				     state->net->ipv6.ip6table_mangle);
 	/* INPUT/FORWARD */
-	return ip6t_do_table(skb, ops->hooknum, state,
-			     dev_net(state->in)->ipv6.ip6table_mangle);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index c3a7f7af0ed4..de2a10a565f5 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -30,49 +30,46 @@ static const struct xt_table nf_nat_ipv6_table = {
 	.af		= NFPROTO_IPV6,
 };
 
-static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_do_chain(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state,
 					  struct nf_conn *ct)
 {
-	struct net *net = nf_ct_net(ct);
-
-	return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
 }
 
-static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_fn(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain);
+	return nf_nat_ipv6_fn(priv, skb, state, ip6table_nat_do_chain);
 }
 
-static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_in(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain);
+	return nf_nat_ipv6_in(priv, skb, state, ip6table_nat_do_chain);
 }
 
-static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_out(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain);
+	return nf_nat_ipv6_out(priv, skb, state, ip6table_nat_do_chain);
 }
 
-static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_local_fn(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain);
+	return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
 }
 
 static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= ip6table_nat_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_NAT_DST,
@@ -80,7 +77,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	/* After packet filtering, change source */
 	{
 		.hook		= ip6table_nat_out,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_NAT_SRC,
@@ -88,7 +84,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	/* Before packet filtering, change destination */
 	{
 		.hook		= ip6table_nat_local_fn,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST,
@@ -96,7 +91,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
 	/* After packet filtering, change source */
 	{
 		.hook		= ip6table_nat_fn,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 0b33caad2b69..9021963565c3 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,12 +19,10 @@ static const struct xt_table packet_raw = {
 
 /* The work comes in here from netfilter.c. */
 static unsigned int
-ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_raw_hook(void *priv, struct sk_buff *skb,
 		  const struct nf_hook_state *state)
 {
-	const struct net *net = dev_net(state->in ? state->in : state->out);
-
-	return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw);
 }
 
 static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index fcef83c25f7b..0d856fedfeb0 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,13 +36,10 @@ static const struct xt_table security_table = {
 };
 
 static unsigned int
-ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_security_hook(void *priv, struct sk_buff *skb,
 		       const struct nf_hook_state *state)
 {
-	const struct net *net = dev_net(state->in ? state->in : state->out);
-
-	return ip6t_do_table(skb, ops->hooknum, state,
-			     net->ipv6.ip6table_security);
+	return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security);
 }
 
 static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 7302900c321a..1aa5848764a7 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
 	return NF_ACCEPT;
 }
 
-static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
+static unsigned int ipv6_helper(void *priv,
 				struct sk_buff *skb,
 				const struct nf_hook_state *state)
 {
@@ -131,7 +131,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
 	return helper->help(skb, protoff, ct, ctinfo);
 }
 
-static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
+static unsigned int ipv6_confirm(void *priv,
 				 struct sk_buff *skb,
 				 const struct nf_hook_state *state)
 {
@@ -165,14 +165,14 @@ out:
 	return nf_conntrack_confirm(skb);
 }
 
-static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
+static unsigned int ipv6_conntrack_in(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
-	return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb);
+	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
 }
 
-static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
+static unsigned int ipv6_conntrack_local(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
 {
@@ -181,48 +181,42 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
 		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
-	return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb);
+	return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
 }
 
 static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
 	{
 		.hook		= ipv6_conntrack_in,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv6_conntrack_local,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_CONNTRACK,
 	},
 	{
 		.hook		= ipv6_helper,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv6_confirm,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_LAST,
 	},
 	{
 		.hook		= ipv6_helper,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_CONNTRACK_HELPER,
 	},
 	{
 		.hook		= ipv6_confirm,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_LAST-1,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 0e6fae103d33..660bc10c7a9c 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -36,6 +36,7 @@ static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
 
 static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
 				unsigned int dataoff,
+				struct net *net,
 				struct nf_conntrack_tuple *tuple)
 {
 	const struct icmp6hdr *hp;
@@ -56,12 +57,12 @@ static const u_int8_t invmap[] = {
 	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
 	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
 	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_REPLY + 1,
-	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY +1
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_QUERY + 1
 };
 
 static const u_int8_t noct_valid_new[] = {
 	[ICMPV6_MGM_QUERY - 130] = 1,
-	[ICMPV6_MGM_REPORT -130] = 1,
+	[ICMPV6_MGM_REPORT - 130] = 1,
 	[ICMPV6_MGM_REDUCTION - 130] = 1,
 	[NDISC_ROUTER_SOLICITATION - 130] = 1,
 	[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
@@ -159,7 +160,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 			       skb_network_offset(skb)
 				+ sizeof(struct ipv6hdr)
 				+ sizeof(struct icmp6hdr),
-			       PF_INET6, &origtuple)) {
+			       PF_INET6, net, &origtuple)) {
 		pr_debug("icmpv6_error: Can't get tuple\n");
 		return -NF_ACCEPT;
 	}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c7196ad1d69f..d5efeb87350e 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -59,7 +59,7 @@ struct nf_ct_frag6_skb_cb
 	struct sk_buff		*orig;
 };
 
-#define NFCT_FRAG6_CB(skb)	((struct nf_ct_frag6_skb_cb*)((skb)->cb))
+#define NFCT_FRAG6_CB(skb)	((struct nf_ct_frag6_skb_cb *)((skb)->cb))
 
 static struct inet_frags nf_frags;
 
@@ -445,7 +445,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
 
-	for (fp=head->next; fp; fp = fp->next) {
+	for (fp = head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
 		head->len += fp->len;
 		if (head->ip_summed != fp->ip_summed)
@@ -563,12 +563,10 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
 	return 0;
 }
 
-struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
+struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
 {
 	struct sk_buff *clone;
 	struct net_device *dev = skb->dev;
-	struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
-				       : dev_net(skb->dev);
 	struct frag_hdr *fhdr;
 	struct frag_queue *fq;
 	struct ipv6hdr *hdr;
@@ -650,12 +648,18 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
 
 static int nf_ct_net_init(struct net *net)
 {
+	int res;
+
 	net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
-	inet_frags_init_net(&net->nf_frag.frags);
-
-	return nf_ct_frag6_sysctl_register(net);
+	res = inet_frags_init_net(&net->nf_frag.frags);
+	if (res)
+		return res;
+	res = nf_ct_frag6_sysctl_register(net);
+	if (res)
+		inet_frags_uninit_net(&net->nf_frag.frags);
+	return res;
 }
 
 static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 6d9c0b3d5b8c..4fdbed5ebfb6 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -51,7 +51,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 		return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
 }
 
-static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
+static unsigned int ipv6_defrag(void *priv,
 				struct sk_buff *skb,
 				const struct nf_hook_state *state)
 {
@@ -63,7 +63,8 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 #endif
 
-	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
+	reasm = nf_ct_frag6_gather(state->net, skb,
+				   nf_ct6_defrag_user(state->hook, skb));
 	/* queued */
 	if (reasm == NULL)
 		return NF_STOLEN;
@@ -74,7 +75,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 
 	nf_ct_frag6_consume_orig(reasm);
 
-	NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm,
+	NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm,
 		       state->in, state->out,
 		       state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
 
@@ -84,14 +85,12 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
 static struct nf_hook_ops ipv6_defrag_ops[] = {
 	{
 		.hook		= ipv6_defrag,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
 	},
 	{
 		.hook		= ipv6_defrag,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_CONNTRACK_DEFRAG,
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index c8ab626556a0..6989c70ae29f 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -19,25 +19,10 @@
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
-static struct net *pick_net(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_NS
-	const struct dst_entry *dst;
-
-	if (skb->dev != NULL)
-		return dev_net(skb->dev);
-	dst = skb_dst(skb);
-	if (dst != NULL && dst->dev != NULL)
-		return dev_net(dst->dev);
-#endif
-	return &init_net;
-}
-
-static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
-			      int oif)
+static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
+			      const struct in6_addr *gw, int oif)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
-	struct net *net = pick_net(skb);
 	struct dst_entry *dst;
 	struct flowi6 fl6;
 
@@ -61,7 +46,7 @@ static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
 	return true;
 }
 
-void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
+void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 		 const struct in6_addr *gw, int oif)
 {
 	if (this_cpu_read(nf_skb_duplicated))
@@ -81,9 +66,9 @@ void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		--iph->hop_limit;
 	}
-	if (nf_dup_ipv6_route(skb, gw, oif)) {
+	if (nf_dup_ipv6_route(net, skb, gw, oif)) {
 		__this_cpu_write(nf_skb_duplicated, true);
-		ip6_local_out(skb);
+		ip6_local_out(net, skb->sk, skb);
 		__this_cpu_write(nf_skb_duplicated, false);
 	} else {
 		kfree_skb(skb);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 70fbaed49edb..238e70c3f7b7 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -262,9 +262,9 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
 
 unsigned int
-nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state,
-	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+	       unsigned int (*do_chain)(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state,
 					struct nf_conn *ct))
@@ -272,7 +272,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn_nat *nat;
-	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+	enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
 	__be16 frag_off;
 	int hdrlen;
 	u8 nexthdr;
@@ -303,7 +303,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 
 		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
 			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
-							     ops->hooknum,
+							     state->hook,
 							     hdrlen))
 				return NF_DROP;
 			else
@@ -317,21 +317,21 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			ret = do_chain(ops, skb, state, ct);
+			ret = do_chain(priv, skb, state, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 
-			if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+			if (nf_nat_initialized(ct, HOOK2MANIP(state->hook)))
 				break;
 
-			ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+			ret = nf_nat_alloc_null_binding(ct, state->hook);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else {
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
-			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+			if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 				goto oif_changed;
 		}
 		break;
@@ -340,11 +340,11 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
-		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+		if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
 			goto oif_changed;
 	}
 
-	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+	return nf_nat_packet(ct, ctinfo, state->hook, skb);
 
 oif_changed:
 	nf_ct_kill_acct(ct, ctinfo, skb);
@@ -353,9 +353,9 @@ oif_changed:
 EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
 
 unsigned int
-nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state,
-	       unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+	       unsigned int (*do_chain)(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state,
 					struct nf_conn *ct))
@@ -363,7 +363,7 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	unsigned int ret;
 	struct in6_addr daddr = ipv6_hdr(skb)->daddr;
 
-	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
 		skb_dst_drop(skb);
@@ -373,9 +373,9 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
 
 unsigned int
-nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
 		const struct nf_hook_state *state,
-		unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+		unsigned int (*do_chain)(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state,
 					 struct nf_conn *ct))
@@ -391,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
 #ifdef CONFIG_XFRM
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -403,7 +403,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
 		     ct->tuplehash[dir].tuple.src.u.all !=
 		     ct->tuplehash[!dir].tuple.dst.u.all)) {
-			err = nf_xfrm_me_harder(skb, AF_INET6);
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
@@ -414,9 +414,9 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
 
 unsigned int
-nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state,
-		     unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+		     unsigned int (*do_chain)(void *priv,
 					      struct sk_buff *skb,
 					      const struct nf_hook_state *state,
 					      struct nf_conn *ct))
@@ -430,14 +430,14 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	if (skb->len < sizeof(struct ipv6hdr))
 		return NF_ACCEPT;
 
-	ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+	ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
 		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 
 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
 				      &ct->tuplehash[!dir].tuple.src.u3)) {
-			err = ip6_route_me_harder(skb);
+			err = ip6_route_me_harder(state->net, skb);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
@@ -446,7 +446,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
 			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
 			 ct->tuplehash[dir].tuple.dst.u.all !=
 			 ct->tuplehash[!dir].tuple.src.u.all) {
-			err = nf_xfrm_me_harder(skb, AF_INET6);
+			err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 7745609665cd..31ba7ca19757 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -34,7 +34,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
 			    ctinfo == IP_CT_RELATED_REPLY));
 
-	if (ipv6_dev_get_saddr(dev_net(out), out,
+	if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
 			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
 		return NF_DROP;
 
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 94b4c6dfb400..e0f922b777e3 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -26,7 +26,7 @@ const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
 	int tcphoff;
 
 	proto = oip6h->nexthdr;
-	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data),
+	tcphoff = ipv6_skip_exthdr(oldskb, ((u8 *)(oip6h + 1) - oldskb->data),
 				   &proto, &frag_off);
 
 	if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
@@ -206,7 +206,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
 		dev_queue_xmit(nskb);
 	} else
 #endif
-		ip6_local_out(nskb);
+		ip6_local_out(net, nskb->sk, nskb);
 }
 EXPORT_SYMBOL_GPL(nf_send_reset6);
 
@@ -224,7 +224,7 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
 		return true;
 
 	proto = ip6h->nexthdr;
-	thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+	thoff = ipv6_skip_exthdr(skb, ((u8 *)(ip6h + 1) - skb->data), &proto, &fo);
 
 	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
 		return false;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index c8148ba76d1a..120ea9131be0 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -16,20 +16,20 @@
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_ipv6.h>
 
-static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+static unsigned int nft_do_chain_ipv6(void *priv,
 				      struct sk_buff *skb,
 				      const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
 
 	/* malformed packet, drop it */
-	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0)
+	if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
 		return NF_DROP;
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+static unsigned int nft_ipv6_output(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
@@ -40,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
 		return NF_ACCEPT;
 	}
 
-	return nft_do_chain_ipv6(ops, skb, state);
+	return nft_do_chain_ipv6(priv, skb, state);
 }
 
 struct nft_af_info nft_af_ipv6 __read_mostly = {
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 951bb458b7bd..443cd306c0b0 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -24,44 +24,44 @@
 #include <net/netfilter/nf_nat_l3proto.h>
 #include <net/ipv6.h>
 
-static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_do_chain(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state,
 				     struct nf_conn *ct)
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo_ipv6(&pkt, ops, skb, state);
+	nft_set_pktinfo_ipv6(&pkt, skb, state);
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
-static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_fn(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv6_fn(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_in(void *priv,
 				    struct sk_buff *skb,
 				    const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv6_in(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_out(void *priv,
 				     struct sk_buff *skb,
 				     const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv6_out(priv, skb, state, nft_nat_do_chain);
 }
 
-static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_local_fn(void *priv,
 					  struct sk_buff *skb,
 					  const struct nf_hook_state *state)
 {
-	return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain);
+	return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
 }
 
 static const struct nf_chain_type nft_chain_nat_ipv6 = {
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 0dafdaac5e17..71d995ff3108 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -22,7 +22,7 @@
 #include <net/netfilter/nf_tables_ipv6.h>
 #include <net/route.h>
 
-static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+static unsigned int nf_route_table_hook(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
 {
@@ -33,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	u32 mark, flowlabel;
 
 	/* malformed packet, drop it */
-	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0)
+	if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
 		return NF_DROP;
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority */
@@ -45,14 +45,14 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u32 *)ipv6_hdr(skb));
 
-	ret = nft_do_chain(&pkt, ops);
+	ret = nft_do_chain(&pkt, priv);
 	if (ret != NF_DROP && ret != NF_QUEUE &&
 	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
 	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
 	     skb->mark != mark ||
 	     ipv6_hdr(skb)->hop_limit != hop_limit ||
 	     flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
-		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+		return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
 
 	return ret;
 }
@@ -61,11 +61,11 @@ static const struct nf_chain_type nft_chain_route_ipv6 = {
 	.name		= "route",
 	.type		= NFT_CHAIN_T_ROUTE,
 	.family		= NFPROTO_IPV6,
-        .owner		= THIS_MODULE,
+	.owner		= THIS_MODULE,
 	.hook_mask	= (1 << NF_INET_LOCAL_OUT),
 	.hooks		= {
-                [NF_INET_LOCAL_OUT]	= nf_route_table_hook,
-        },
+		[NF_INET_LOCAL_OUT]	= nf_route_table_hook,
+	},
 };
 
 static int __init nft_chain_route_init(void)
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 0eaa4f65fdea..8bfd470cbe72 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
 	struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
 	int oif = regs->data[priv->sreg_dev];
 
-	nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif);
+	nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
 }
 
 static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index effd393bd517..aca44e89a881 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -35,8 +35,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
 
 	range.flags |= priv->flags;
 
-	regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range,
-						  pkt->ops->hooknum);
+	regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook);
 }
 
 static struct nft_expr_type nft_redir_ipv6_type;
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index d0d1540ecf87..533cd5719c59 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -24,15 +24,14 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
 				 const struct nft_pktinfo *pkt)
 {
 	struct nft_reject *priv = nft_expr_priv(expr);
-	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
 
 	switch (priv->type) {
 	case NFT_REJECT_ICMP_UNREACH:
-		nf_send_unreach6(net, pkt->skb, priv->icmp_code,
-				 pkt->ops->hooknum);
+		nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
+				 pkt->hook);
 		break;
 	case NFT_REJECT_TCP_RST:
-		nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+		nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
 		break;
 	default:
 		break;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 928a0fb0b744..462f2a76b5c2 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -138,7 +138,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
 EXPORT_SYMBOL(ip6_dst_hoplimit);
 #endif
 
-static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	int len;
 
@@ -148,30 +148,20 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
 	ipv6_hdr(skb)->payload_len = htons(len);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
-	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-		       NULL, skb_dst(skb)->dev, dst_output_sk);
-}
-
-int __ip6_local_out(struct sk_buff *skb)
-{
-	return __ip6_local_out_sk(skb->sk, skb);
+	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+		       net, sk, skb, NULL, skb_dst(skb)->dev,
+		       dst_output);
 }
 EXPORT_SYMBOL_GPL(__ip6_local_out);
 
-int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	int err;
 
-	err = __ip6_local_out_sk(sk, skb);
+	err = __ip6_local_out(net, sk, skb);
 	if (likely(err == 1))
-		err = dst_output_sk(sk, skb);
+		err = dst_output(net, sk, skb);
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(ip6_local_out_sk);
-
-int ip6_local_out(struct sk_buff *skb)
-{
-	return ip6_local_out_sk(skb->sk, skb);
-}
 EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fdbada1569a3..dc65ec198f7c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -614,6 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 			unsigned int flags)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net *net = sock_net(sk);
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
 	int err;
@@ -652,9 +653,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	if (err)
 		goto error_fault;
 
-	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
-	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
-		      NULL, rt->dst.dev, dst_output_sk);
+	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+		      NULL, rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
@@ -666,7 +667,7 @@ error_fault:
 	err = -EFAULT;
 	kfree_skb(skb);
 error:
-	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 	if (err == -ENOBUFS && !np->recverr)
 		err = 0;
 	return err;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f1159bb76e0a..44e21a03cfc3 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -706,13 +706,19 @@ static void ip6_frags_sysctl_unregister(void)
 
 static int __net_init ipv6_frags_init_net(struct net *net)
 {
+	int res;
+
 	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
 
-	inet_frags_init_net(&net->ipv6.frags);
-
-	return ip6_frags_ns_sysctl_register(net);
+	res = inet_frags_init_net(&net->ipv6.frags);
+	if (res)
+		return res;
+	res = ip6_frags_ns_sysctl_register(net);
+	if (res)
+		inet_frags_uninit_net(&net->ipv6.frags);
+	return res;
 }
 
 static void __net_exit ipv6_frags_exit_net(struct net *net)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 946880ad48ac..c8bc9b4ac328 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
 #include <net/nexthop.h>
 #include <net/lwtunnel.h>
 #include <net/ip_tunnels.h>
+#include <net/l3mdev.h>
 
 #include <asm/uaccess.h>
 
@@ -86,9 +87,9 @@ static void		ip6_dst_ifdown(struct dst_entry *,
 static int		 ip6_dst_gc(struct dst_ops *ops);
 
 static int		ip6_pkt_discard(struct sk_buff *skb);
-static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
+static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 static int		ip6_pkt_prohibit(struct sk_buff *skb);
-static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
+static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
 static void		ip6_link_failure(struct sk_buff *skb);
 static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
 					   struct sk_buff *skb, u32 mtu);
@@ -303,7 +304,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
 		.error		= -EINVAL,
 		.input		= dst_discard,
-		.output		= dst_discard_sk,
+		.output		= dst_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -421,31 +422,7 @@ static bool rt6_check_expired(const struct rt6_info *rt)
 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
 			       const struct flowi6 *fl6)
 {
-	unsigned int val = fl6->flowi6_proto;
-
-	val ^= ipv6_addr_hash(&fl6->daddr);
-	val ^= ipv6_addr_hash(&fl6->saddr);
-
-	/* Work only if this not encapsulated */
-	switch (fl6->flowi6_proto) {
-	case IPPROTO_UDP:
-	case IPPROTO_TCP:
-	case IPPROTO_SCTP:
-		val ^= (__force u16)fl6->fl6_sport;
-		val ^= (__force u16)fl6->fl6_dport;
-		break;
-
-	case IPPROTO_ICMPV6:
-		val ^= (__force u16)fl6->fl6_icmp_type;
-		val ^= (__force u16)fl6->fl6_icmp_code;
-		break;
-	}
-	/* RFC6438 recommands to use flowlabel */
-	val ^= (__force u32)fl6->flowlabel;
-
-	/* Perhaps, we need to tune, this function? */
-	val = val ^ (val >> 7) ^ (val >> 12);
-	return val % candidate_count;
+	return get_hash_from_flowi6(fl6) % candidate_count;
 }
 
 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
@@ -498,10 +475,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 			if (dev->flags & IFF_LOOPBACK) {
 				if (!sprt->rt6i_idev ||
 				    sprt->rt6i_idev->dev->ifindex != oif) {
-					if (flags & RT6_LOOKUP_F_IFACE && oif)
+					if (flags & RT6_LOOKUP_F_IFACE)
 						continue;
-					if (local && (!oif ||
-						      local->rt6i_idev->dev->ifindex == oif))
+					if (local &&
+					    local->rt6i_idev->dev->ifindex == oif)
 						continue;
 				}
 				local = sprt;
@@ -538,7 +515,7 @@ static void rt6_probe_deferred(struct work_struct *w)
 		container_of(w, struct __rt6_probe_work, work);
 
 	addrconf_addr_solict_mult(&work->target, &mcaddr);
-	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
+	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
 	dev_put(work->dev);
 	kfree(work);
 }
@@ -1168,7 +1145,7 @@ void ip6_route_input(struct sk_buff *skb)
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct ip_tunnel_info *tun_info;
 	struct flowi6 fl6 = {
-		.flowi6_iif = skb->dev->ifindex,
+		.flowi6_iif = l3mdev_fib_oif(skb->dev),
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
 		.flowlabel = ip6_flowinfo(iph),
@@ -1192,9 +1169,14 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
 				    struct flowi6 *fl6)
 {
+	struct dst_entry *dst;
 	int flags = 0;
 	bool any_src;
 
+	dst = l3mdev_rt6_dst_by_oif(net, fl6);
+	if (dst)
+		return dst;
+
 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
 	any_src = ipv6_addr_any(&fl6->saddr);
@@ -1223,7 +1205,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 		new = &rt->dst;
 		new->__use = 1;
 		new->input = dst_discard;
-		new->output = dst_discard_sk;
+		new->output = dst_discard_out;
 
 		dst_copy_metrics(new, &ort->dst);
 		rt->rt6i_idev = ort->rt6i_idev;
@@ -1749,21 +1731,21 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 	return -EINVAL;
 }
 
-int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
 {
-	int err;
 	struct net *net = cfg->fc_nlinfo.nl_net;
 	struct rt6_info *rt = NULL;
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	struct fib6_table *table;
 	int addr_type;
+	int err = -EINVAL;
 
 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
-		return -EINVAL;
+		goto out;
 #ifndef CONFIG_IPV6_SUBTREES
 	if (cfg->fc_src_len)
-		return -EINVAL;
+		goto out;
 #endif
 	if (cfg->fc_ifindex) {
 		err = -ENODEV;
@@ -1878,7 +1860,7 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
 		switch (cfg->fc_type) {
 		case RTN_BLACKHOLE:
 			rt->dst.error = -EINVAL;
-			rt->dst.output = dst_discard_sk;
+			rt->dst.output = dst_discard_out;
 			rt->dst.input = dst_discard;
 			break;
 		case RTN_PROHIBIT:
@@ -1983,9 +1965,7 @@ install_route:
 
 	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
-	*rt_ret = rt;
-
-	return 0;
+	return rt;
 out:
 	if (dev)
 		dev_put(dev);
@@ -1994,20 +1974,21 @@ out:
 	if (rt)
 		dst_free(&rt->dst);
 
-	*rt_ret = NULL;
-
-	return err;
+	return ERR_PTR(err);
 }
 
 int ip6_route_add(struct fib6_config *cfg)
 {
 	struct mx6_config mxc = { .mx = NULL, };
-	struct rt6_info *rt = NULL;
+	struct rt6_info *rt;
 	int err;
 
-	err = ip6_route_info_create(cfg, &rt);
-	if (err)
+	rt = ip6_route_info_create(cfg);
+	if (IS_ERR(rt)) {
+		err = PTR_ERR(rt);
+		rt = NULL;
 		goto out;
+	}
 
 	err = ip6_convert_metrics(&mxc, cfg);
 	if (err)
@@ -2099,7 +2080,6 @@ static int ip6_route_del(struct fib6_config *cfg)
 
 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb->dev);
 	struct netevent_redirect netevent;
 	struct rt6_info *rt, *nrt = NULL;
 	struct ndisc_options ndopts;
@@ -2160,7 +2140,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
 	}
 
 	rt = (struct rt6_info *) dst;
-	if (rt == net->ipv6.ip6_null_entry) {
+	if (rt->rt6i_flags & RTF_REJECT) {
 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
 		return;
 	}
@@ -2289,7 +2269,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 					   unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= RT6_TABLE_INFO,
 		.fc_metric	= IP6_RT_PRIO_USER,
 		.fc_ifindex	= ifindex,
 		.fc_dst_len	= prefixlen,
@@ -2300,6 +2279,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_nlinfo.nl_net = net,
 	};
 
+	cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
 	cfg.fc_dst = *prefix;
 	cfg.fc_gateway = *gwaddr;
 
@@ -2340,7 +2320,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 				     unsigned int pref)
 {
 	struct fib6_config cfg = {
-		.fc_table	= RT6_TABLE_DFLT,
+		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
 		.fc_metric	= IP6_RT_PRIO_USER,
 		.fc_ifindex	= dev->ifindex,
 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2387,7 +2367,8 @@ static void rtmsg_to_fib6_config(struct net *net,
 {
 	memset(cfg, 0, sizeof(*cfg));
 
-	cfg->fc_table = RT6_TABLE_MAIN;
+	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+			 : RT6_TABLE_MAIN;
 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
 	cfg->fc_metric = rtmsg->rtmsg_metric;
 	cfg->fc_expires = rtmsg->rtmsg_info;
@@ -2471,7 +2452,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
 }
 
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	skb->dev = skb_dst(skb)->dev;
 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2482,7 +2463,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
 }
 
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	skb->dev = skb_dst(skb)->dev;
 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
@@ -2496,6 +2477,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 				    const struct in6_addr *addr,
 				    bool anycast)
 {
+	u32 tb_id;
 	struct net *net = dev_net(idev->dev);
 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
 					    DST_NOCOUNT);
@@ -2518,7 +2500,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->rt6i_gateway  = *addr;
 	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
-	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
+	rt->rt6i_table = fib6_get_table(net, tb_id);
 	rt->dst.flags |= DST_NOCACHE;
 
 	atomic_set(&rt->dst.__refcnt, 1);
@@ -2897,9 +2880,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
 				r_cfg.fc_encap_type = nla_get_u16(nla);
 		}
 
-		err = ip6_route_info_create(&r_cfg, &rt);
-		if (err)
+		rt = ip6_route_info_create(&r_cfg);
+		if (IS_ERR(rt)) {
+			err = PTR_ERR(rt);
+			rt = NULL;
 			goto cleanup;
+		}
 
 		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
 		if (err) {
@@ -3278,6 +3264,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
 	} else {
 		fl6.flowi6_oif = oif;
 
+		if (netif_index_is_l3_master(net, oif)) {
+			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
+					   FLOWI_FLAG_SKIP_NH_OIF;
+		}
+
 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
 	}
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 94428fd85b2f..dcccae86190f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev)
 	return 0;
 }
 
-static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
+static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
 	struct net *net = dev_net(dev);
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 
-	tunnel->dev = dev;
-	tunnel->net = dev_net(dev);
-
 	iph->version		= 4;
 	iph->protocol		= IPPROTO_IPV6;
 	iph->ihl		= 5;
 	iph->ttl		= 64;
 
-	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
-	if (!dev->tstats)
-		return -ENOMEM;
-
-	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
-	if (!tunnel->dst_cache) {
-		free_percpu(dev->tstats);
-		return -ENOMEM;
-	}
-
 	dev_hold(dev);
 	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
-	return 0;
 }
 
 static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1831,23 +1817,19 @@ static int __net_init sit_init_net(struct net *net)
 	 */
 	sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 
-	err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
-	if (err)
-		goto err_dev_free;
-
-	ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
 	err = register_netdev(sitn->fb_tunnel_dev);
 	if (err)
 		goto err_reg_dev;
 
+	ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
+	ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
+
 	t = netdev_priv(sitn->fb_tunnel_dev);
 
 	strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
 	return 0;
 
 err_reg_dev:
-	dev_put(sitn->fb_tunnel_dev);
-err_dev_free:
 	ipip6_dev_free(sitn->fb_tunnel_dev);
 err_alloc_dev:
 	return err;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 0909f4e0d53c..bb8f2fa1c7fb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -114,14 +114,11 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
 }
 EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
 
-__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	const struct tcphdr *th = tcp_hdr(skb);
 
-	tcp_synq_overflow(sk);
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
 	return __cookie_v6_init_sequence(iph, th, mssp);
 }
 
@@ -173,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		goto out;
 
 	ret = NULL;
-	req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
+	req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
 	if (!req)
 		goto out;
 
@@ -210,7 +207,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
 	req->ts_recent		= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
-	treq->snt_synack	= tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+	treq->snt_synack.v64	= 0;
 	treq->rcv_isn = ntohl(th->seq) - 1;
 	treq->snt_isn = cookie;
 
@@ -238,9 +235,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 			goto out_free;
 	}
 
-	req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+	req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
-				  &req->rcv_wnd, &req->window_clamp,
+				  &req->rsk_rcv_wnd, &req->rsk_window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
 				  dst_metric(dst, RTAX_INITRWND));
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 97d9314ea361..5baa8e754e41 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -70,8 +70,8 @@
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
 
-static void	tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
-static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 				      struct request_sock *req);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
@@ -82,7 +82,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific;
 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
 #else
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
 						   const struct in6_addr *addr)
 {
 	return NULL;
@@ -434,11 +434,11 @@ out:
 }
 
 
-static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 			      struct flowi *fl,
 			      struct request_sock *req,
-			      u16 queue_mapping,
-			      struct tcp_fastopen_cookie *foc)
+			      struct tcp_fastopen_cookie *foc,
+			      bool attach_req)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -447,10 +447,11 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 	int err = -ENOMEM;
 
 	/* First, grab a route. */
-	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
+	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
+					       IPPROTO_TCP)) == NULL)
 		goto done;
 
-	skb = tcp_make_synack(sk, dst, req, foc);
+	skb = tcp_make_synack(sk, dst, req, foc, attach_req);
 
 	if (skb) {
 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -460,7 +461,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 		if (np->repflow && ireq->pktopts)
 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
-		skb_set_queue_mapping(skb, queue_mapping);
 		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
@@ -476,13 +476,13 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
 }
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
 						   const struct in6_addr *addr)
 {
 	return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
 }
 
-static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
 						const struct sock *addr_sk)
 {
 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
@@ -621,8 +621,12 @@ clear_hash_noput:
 	return 1;
 }
 
-static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+#endif
+
+static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+				    const struct sk_buff *skb)
 {
+#ifdef CONFIG_TCP_MD5SIG
 	const __u8 *hash_location = NULL;
 	struct tcp_md5sig_key *hash_expected;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -659,26 +663,27 @@ static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
 				     &ip6h->daddr, ntohs(th->dest));
 		return true;
 	}
+#endif
 	return false;
 }
-#endif
 
-static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
+static void tcp_v6_init_req(struct request_sock *req,
+			    const struct sock *sk_listener,
 			    struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
-	struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk_listener);
 
 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
 
 	/* So that link locals have meaning */
-	if (!sk->sk_bound_dev_if &&
+	if (!sk_listener->sk_bound_dev_if &&
 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
 		ireq->ir_iif = tcp_v6_iif(skb);
 
 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
-	    (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
+	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
 	     np->rxopt.bits.rxinfo ||
 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
 	     np->rxopt.bits.rxohlim || np->repflow)) {
@@ -687,13 +692,14 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
 	}
 }
 
-static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl,
+static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
+					  struct flowi *fl,
 					  const struct request_sock *req,
 					  bool *strict)
 {
 	if (strict)
 		*strict = true;
-	return inet6_csk_route_req(sk, &fl->u.ip6, req);
+	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
 }
 
 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
@@ -720,10 +726,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.route_req	=	tcp_v6_route_req,
 	.init_seq	=	tcp_v6_init_sequence,
 	.send_synack	=	tcp_v6_send_synack,
-	.queue_hash_add =	inet6_csk_reqsk_queue_hash_add,
 };
 
-static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
+static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
 				 int oif, struct tcp_md5sig_key *key, int rst,
 				 u8 tclass, u32 label)
@@ -822,7 +827,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
 	kfree_skb(buff);
 }
 
-static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 	u32 seq = 0, ack_seq = 0;
@@ -893,7 +898,7 @@ release_sk1:
 #endif
 }
 
-static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq,
+static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
 			    struct tcp_md5sig_key *key, u8 tclass,
 			    u32 label)
@@ -916,7 +921,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 	inet_twsk_put(tw);
 }
 
-static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req)
 {
 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
@@ -924,44 +929,18 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 	 */
 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
-			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+			tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
 			tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
 			0, 0);
 }
 
 
-static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
+static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
 {
+#ifdef CONFIG_SYN_COOKIES
 	const struct tcphdr *th = tcp_hdr(skb);
-	struct request_sock *req;
-	struct sock *nsk;
-
-	/* Find possible connection requests. */
-	req = inet6_csk_search_req(sk, th->source,
-				   &ipv6_hdr(skb)->saddr,
-				   &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
-	if (req) {
-		nsk = tcp_check_req(sk, skb, req, false);
-		if (!nsk || nsk == sk)
-			reqsk_put(req);
-		return nsk;
-	}
-	nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
-					 &ipv6_hdr(skb)->saddr, th->source,
-					 &ipv6_hdr(skb)->daddr, ntohs(th->dest),
-					 tcp_v6_iif(skb));
-
-	if (nsk) {
-		if (nsk->sk_state != TCP_TIME_WAIT) {
-			bh_lock_sock(nsk);
-			return nsk;
-		}
-		inet_twsk_put(inet_twsk(nsk));
-		return NULL;
-	}
 
-#ifdef CONFIG_SYN_COOKIES
 	if (!th->syn)
 		sk = cookie_v6_check(sk, skb);
 #endif
@@ -984,12 +963,15 @@ drop:
 	return 0; /* don't send reset */
 }
 
-static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 					 struct request_sock *req,
-					 struct dst_entry *dst)
+					 struct dst_entry *dst,
+					 struct request_sock *req_unhash,
+					 bool *own_req)
 {
 	struct inet_request_sock *ireq;
-	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+	struct ipv6_pinfo *newnp;
+	const struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
@@ -1004,7 +986,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 *	v6 mapped
 		 */
 
-		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
+		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
+					     req_unhash, own_req);
 
 		if (!newsk)
 			return NULL;
@@ -1057,7 +1040,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		goto out_overflow;
 
 	if (!dst) {
-		dst = inet6_csk_route_req(sk, &fl6, req);
+		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
 		if (!dst)
 			goto out;
 	}
@@ -1090,8 +1073,6 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
 	newsk->sk_bound_dev_if = ireq->ir_iif;
 
-	sk_set_txhash(newsk);
-
 	/* Now IPv6 options...
 
 	   First: no IPv4 options.
@@ -1103,16 +1084,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	/* Clone RX bits */
 	newnp->rxopt.all = np->rxopt.all;
 
-	/* Clone pktoptions received with SYN */
 	newnp->pktoptions = NULL;
-	if (ireq->pktopts) {
-		newnp->pktoptions = skb_clone(ireq->pktopts,
-					      sk_gfp_atomic(sk, GFP_ATOMIC));
-		consume_skb(ireq->pktopts);
-		ireq->pktopts = NULL;
-		if (newnp->pktoptions)
-			skb_set_owner_r(newnp->pktoptions, newsk);
-	}
 	newnp->opt	  = NULL;
 	newnp->mcast_oif  = tcp_v6_iif(skb);
 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
@@ -1167,7 +1139,20 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		tcp_done(newsk);
 		goto out;
 	}
-	__inet_hash(newsk, NULL);
+	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	if (*own_req) {
+		tcp_move_syn(newtp, req);
+
+		/* Clone pktoptions received with SYN, if we own the req */
+		if (ireq->pktopts) {
+			newnp->pktoptions = skb_clone(ireq->pktopts,
+						      sk_gfp_atomic(sk, GFP_ATOMIC));
+			consume_skb(ireq->pktopts);
+			ireq->pktopts = NULL;
+			if (newnp->pktoptions)
+				skb_set_owner_r(newnp->pktoptions, newsk);
+		}
+	}
 
 	return newsk;
 
@@ -1181,7 +1166,7 @@ out:
 }
 
 /* The socket must have it's spinlock held when we get
- * here.
+ * here, unless it is a TCP_LISTEN socket.
  *
  * We have a potential double-lock case here, so even when
  * doing backlog processing we use the BH locking scheme.
@@ -1252,18 +1237,14 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		goto csum_err;
 
 	if (sk->sk_state == TCP_LISTEN) {
-		struct sock *nsk = tcp_v6_hnd_req(sk, skb);
+		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
+
 		if (!nsk)
 			goto discard;
 
-		/*
-		 * Queue it on the new socket if the new socket is active,
-		 * otherwise we just shortcircuit this and continue with
-		 * the new socket..
-		 */
 		if (nsk != sk) {
 			sock_rps_save_rxhash(nsk, skb);
-			sk_mark_napi_id(sk, skb);
+			sk_mark_napi_id(nsk, skb);
 			if (tcp_child_process(sk, nsk, skb))
 				goto reset;
 			if (opt_skb)
@@ -1273,7 +1254,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	} else
 		sock_rps_save_rxhash(sk, skb);
 
-	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
+	if (tcp_rcv_state_process(sk, skb))
 		goto reset;
 	if (opt_skb)
 		goto ipv6_pktoptions;
@@ -1387,6 +1368,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 	th = tcp_hdr(skb);
 	hdr = ipv6_hdr(skb);
 
+lookup:
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
 				inet6_iif(skb));
 	if (!sk)
@@ -1396,6 +1378,37 @@ process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
 
+	if (sk->sk_state == TCP_NEW_SYN_RECV) {
+		struct request_sock *req = inet_reqsk(sk);
+		struct sock *nsk = NULL;
+
+		sk = req->rsk_listener;
+		tcp_v6_fill_cb(skb, hdr, th);
+		if (tcp_v6_inbound_md5_hash(sk, skb)) {
+			reqsk_put(req);
+			goto discard_it;
+		}
+		if (likely(sk->sk_state == TCP_LISTEN)) {
+			nsk = tcp_check_req(sk, skb, req, false);
+		} else {
+			inet_csk_reqsk_queue_drop_and_put(sk, req);
+			goto lookup;
+		}
+		if (!nsk) {
+			reqsk_put(req);
+			goto discard_it;
+		}
+		if (nsk == sk) {
+			sock_hold(sk);
+			reqsk_put(req);
+			tcp_v6_restore_cb(skb);
+		} else if (tcp_child_process(sk, nsk, skb)) {
+			tcp_v6_send_reset(nsk, skb);
+			goto discard_it;
+		} else {
+			return 0;
+		}
+	}
 	if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
 		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
 		goto discard_and_relse;
@@ -1406,17 +1419,21 @@ process:
 
 	tcp_v6_fill_cb(skb, hdr, th);
 
-#ifdef CONFIG_TCP_MD5SIG
 	if (tcp_v6_inbound_md5_hash(sk, skb))
 		goto discard_and_relse;
-#endif
 
 	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
-	sk_incoming_cpu_update(sk);
 	skb->dev = NULL;
 
+	if (sk->sk_state == TCP_LISTEN) {
+		ret = tcp_v6_do_rcv(sk, skb);
+		goto put_and_return;
+	}
+
+	sk_incoming_cpu_update(sk);
+
 	bh_lock_sock_nested(sk);
 	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	ret = 0;
@@ -1431,6 +1448,7 @@ process:
 	}
 	bh_unlock_sock(sk);
 
+put_and_return:
 	sock_put(sk);
 	return ret ? -1 : 0;
 
@@ -1631,7 +1649,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
 #ifdef CONFIG_PROC_FS
 /* Proc filesystem TCPv6 sock list dumping. */
 static void get_openreq6(struct seq_file *seq,
-			 struct request_sock *req, int i, kuid_t uid)
+			 const struct request_sock *req, int i)
 {
 	long ttd = req->rsk_timer.expires - jiffies;
 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
@@ -1655,7 +1673,8 @@ static void get_openreq6(struct seq_file *seq,
 		   1,   /* timers active (only the expire timer) */
 		   jiffies_to_clock_t(ttd),
 		   req->num_timeout,
-		   from_kuid_munged(seq_user_ns(seq), uid),
+		   from_kuid_munged(seq_user_ns(seq),
+				    sock_i_uid(req->rsk_listener)),
 		   0,  /* non standard timer */
 		   0, /* open_requests have no inode */
 		   0, req);
@@ -1670,7 +1689,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 	const struct inet_sock *inet = inet_sk(sp);
 	const struct tcp_sock *tp = tcp_sk(sp);
 	const struct inet_connection_sock *icsk = inet_csk(sp);
-	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
+	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
 
 	dest  = &sp->sk_v6_daddr;
 	src   = &sp->sk_v6_rcv_saddr;
@@ -1714,7 +1733,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		   tp->snd_cwnd,
 		   sp->sk_state == TCP_LISTEN ?
-			(fastopenq ? fastopenq->max_qlen : 0) :
+			fastopenq->max_qlen :
 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
 		   );
 }
@@ -1760,18 +1779,12 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
 	}
 	st = seq->private;
 
-	switch (st->state) {
-	case TCP_SEQ_STATE_LISTENING:
-	case TCP_SEQ_STATE_ESTABLISHED:
-		if (sk->sk_state == TCP_TIME_WAIT)
-			get_timewait6_sock(seq, v, st->num);
-		else
-			get_tcp6_sock(seq, v, st->num);
-		break;
-	case TCP_SEQ_STATE_OPENREQ:
-		get_openreq6(seq, v, st->num, st->uid);
-		break;
-	}
+	if (sk->sk_state == TCP_TIME_WAIT)
+		get_timewait6_sock(seq, v, st->num);
+	else if (sk->sk_state == TCP_NEW_SYN_RECV)
+		get_openreq6(seq, v, st->num);
+	else
+		get_tcp6_sock(seq, v, st->num);
 out:
 	return 0;
 }
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 3c758007b327..dae25cad05cd 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -144,6 +144,16 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			break;
 }
 
+static void tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			 u8 type, u8 code, int offset, __be32 info)
+{
+	struct xfrm6_tunnel *handler;
+
+	for_each_tunnel_rcu(tunnel46_handlers, handler)
+		if (!handler->err_handler(skb, opt, type, code, offset, info))
+			break;
+}
+
 static const struct inet6_protocol tunnel6_protocol = {
 	.handler	= tunnel6_rcv,
 	.err_handler	= tunnel6_err,
@@ -152,7 +162,7 @@ static const struct inet6_protocol tunnel6_protocol = {
 
 static const struct inet6_protocol tunnel46_protocol = {
 	.handler	= tunnel46_rcv,
-	.err_handler	= tunnel6_err,
+	.err_handler	= tunnel46_err,
 	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0aba654f5b91..01bcb49619ee 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -182,10 +182,12 @@ static inline int compute_score(struct sock *sk, struct net *net,
 		score++;
 	}
 
+	if (sk->sk_incoming_cpu == raw_smp_processor_id())
+		score++;
+
 	return score;
 }
 
-#define SCORE2_MAX (1 + 1 + 1)
 static inline int compute_score2(struct sock *sk, struct net *net,
 				 const struct in6_addr *saddr, __be16 sport,
 				 const struct in6_addr *daddr,
@@ -223,6 +225,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
 		score++;
 	}
 
+	if (sk->sk_incoming_cpu == raw_smp_processor_id())
+		score++;
+
 	return score;
 }
 
@@ -251,8 +256,7 @@ begin:
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
 				matches = 1;
-			} else if (score == SCORE2_MAX)
-				goto exact_match;
+			}
 		} else if (score == badness && reuseport) {
 			matches++;
 			if (reciprocal_scale(hash, matches) == 0)
@@ -269,7 +273,6 @@ begin:
 		goto begin;
 
 	if (result) {
-exact_match:
 		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
 			result = NULL;
 		else if (unlikely(compute_score2(result, net, saddr, sport,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 74bd17882a2f..0eaab1fa6be5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -42,8 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
 
-	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
-		skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+		dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		ip6_rcv_finish);
 	return -1;
 }
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index e15feb7b413d..4d09ce6fa90e 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -132,7 +132,14 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
 	return xfrm_output(sk, skb);
 }
 
-static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+	return x->outer_mode->afinfo->output_finish(sk, skb);
+}
+
+static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct xfrm_state *x = dst->xfrm;
@@ -142,7 +149,7 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
 #ifdef CONFIG_NETFILTER
 	if (!x) {
 		IP6CB(skb)->flags |= IP6SKB_REROUTED;
-		return dst_output_sk(sk, skb);
+		return dst_output(net, sk, skb);
 	}
 #endif
 
@@ -165,16 +172,17 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
 	}
 
 	if (toobig || dst_allfrag(skb_dst(skb)))
-		return ip6_fragment(sk, skb,
-				    x->outer_mode->afinfo->output_finish);
+		return ip6_fragment(net, sk, skb,
+				    __xfrm6_output_finish);
 
 skip_frag:
 	return x->outer_mode->afinfo->output_finish(sk, skb);
 }
 
-int xfrm6_output(struct sock *sk, struct sk_buff *skb)
+int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
-			    NULL, skb_dst(skb)->dev, __xfrm6_output,
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+			    net, sk, skb,  NULL, skb_dst(skb)->dev,
+			    __xfrm6_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index da55e0c85bb8..5643423fe67a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
 #include <net/ip.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 #include <net/mip6.h>
 #endif
@@ -133,10 +133,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 	nexthdr = nh[nhoff];
 
-	if (skb_dst(skb)) {
-		oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
-			: skb_dst(skb)->dev->ifindex;
-	}
+	if (skb_dst(skb))
+		oif = l3mdev_fib_oif(skb_dst(skb)->dev);
 
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->flowi6_mark = skb->mark;
@@ -290,7 +288,7 @@ static struct dst_ops xfrm6_dst_ops = {
 	.destroy =		xfrm6_dst_destroy,
 	.ifdown =		xfrm6_dst_ifdown,
 	.local_out =		__ip6_local_out,
-	.gc_thresh =		32768,
+	.gc_thresh =		INT_MAX,
 };
 
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index fae6822cc367..e6aa48b5395c 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2123,8 +2123,7 @@ static int irda_setsockopt(struct socket *sock, int level, int optname,
 		}
 
 		/* Unregister any old registration */
-		if (self->skey)
-			irlmp_unregister_service(self->skey);
+		irlmp_unregister_service(self->skey);
 
 		self->skey = irlmp_register_service((__u16) opt);
 		break;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 683346d2d633..a4237707f79d 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -335,8 +335,7 @@ static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self,
 		 * specified, we cannot return before the IrCOMM link is
 		 * ready
 		 */
-		if (!test_bit(ASYNCB_CLOSING, &port->flags) &&
-		    (do_clocal || tty_port_carrier_raised(port)) &&
+		if ((do_clocal || tty_port_carrier_raised(port)) &&
 		    self->state == IRCOMM_TTY_READY)
 		{
 			break;
@@ -443,34 +442,6 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
 	/* Not really used by us, but lets do it anyway */
 	self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
-	/*
-	 * If the port is the middle of closing, bail out now
-	 */
-	if (test_bit(ASYNCB_CLOSING, &self->port.flags)) {
-
-		/* Hm, why are we blocking on ASYNC_CLOSING if we
-		 * do return -EAGAIN/-ERESTARTSYS below anyway?
-		 * IMHO it's either not needed in the first place
-		 * or for some reason we need to make sure the async
-		 * closing has been finished - if so, wouldn't we
-		 * probably better sleep uninterruptible?
-		 */
-
-		if (wait_event_interruptible(self->port.close_wait,
-				!test_bit(ASYNCB_CLOSING, &self->port.flags))) {
-			net_warn_ratelimited("%s - got signal while blocking on ASYNC_CLOSING!\n",
-					     __func__);
-			return -ERESTARTSYS;
-		}
-
-#ifdef SERIAL_DO_RESTART
-		return (self->port.flags & ASYNC_HUP_NOTIFY) ?
-			-EAGAIN : -ERESTARTSYS;
-#else
-		return -EAGAIN;
-#endif
-	}
-
 	/* Check if this is a "normal" ircomm device, or an irlpt device */
 	if (self->line < 0x10) {
 		self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 918151c11348..fcb2752419c6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -95,11 +95,10 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
 /* Call Back functions */
 static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
 static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
-static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]);
-static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8],
-				 u8 ipuser[16]);
-static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]);
-static void iucv_callback_shutdown(struct iucv_path *, u8 ipuser[16]);
+static void iucv_callback_connack(struct iucv_path *, u8 *);
+static int iucv_callback_connreq(struct iucv_path *, u8 *, u8 *);
+static void iucv_callback_connrej(struct iucv_path *, u8 *);
+static void iucv_callback_shutdown(struct iucv_path *, u8 *);
 
 static struct iucv_sock_list iucv_sk_list = {
 	.lock = __RW_LOCK_UNLOCKED(iucv_sk_list.lock),
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 2a6a1fdd62c0..7eaa000c9258 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -713,7 +713,7 @@ static struct notifier_block __refdata iucv_cpu_notifier = {
  *
  * Sever an iucv path to free up the pathid. Used internally.
  */
-static int iucv_sever_pathid(u16 pathid, u8 userdata[16])
+static int iucv_sever_pathid(u16 pathid, u8 *userdata)
 {
 	union iucv_param *parm;
 
@@ -876,7 +876,7 @@ static struct notifier_block iucv_reboot_notifier = {
  * Returns the result of the CP IUCV call.
  */
 int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
-		     u8 userdata[16], void *private)
+		     u8 *userdata, void *private)
 {
 	union iucv_param *parm;
 	int rc;
@@ -923,7 +923,7 @@ EXPORT_SYMBOL(iucv_path_accept);
  * Returns the result of the CP IUCV call.
  */
 int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
-		      u8 userid[8], u8 system[8], u8 userdata[16],
+		      u8 *userid, u8 *system, u8 *userdata,
 		      void *private)
 {
 	union iucv_param *parm;
@@ -985,7 +985,7 @@ EXPORT_SYMBOL(iucv_path_connect);
  *
  * Returns the result from the CP IUCV call.
  */
-int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16])
+int iucv_path_quiesce(struct iucv_path *path, u8 *userdata)
 {
 	union iucv_param *parm;
 	int rc;
@@ -1017,7 +1017,7 @@ EXPORT_SYMBOL(iucv_path_quiesce);
  *
  * Returns the result from the CP IUCV call.
  */
-int iucv_path_resume(struct iucv_path *path, u8 userdata[16])
+int iucv_path_resume(struct iucv_path *path, u8 *userdata)
 {
 	union iucv_param *parm;
 	int rc;
@@ -1047,7 +1047,7 @@ out:
  *
  * Returns the result from the CP IUCV call.
  */
-int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
+int iucv_path_sever(struct iucv_path *path, u8 *userdata)
 {
 	int rc;
 
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 68aa9ffd4ae4..5871537af387 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -321,4 +321,7 @@ do {									\
 #define l2tp_dbg(ptr, type, fmt, ...)					\
 	l2tp_printk(ptr, type, pr_debug, fmt, ##__VA_ARGS__)
 
+#define MODULE_ALIAS_L2TP_PWTYPE(type) \
+	MODULE_ALIAS("net-l2tp-type-" __stringify(type))
+
 #endif /* _L2TP_CORE_H_ */
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 4b552873b556..e253c26f31ac 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -358,3 +358,4 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
 MODULE_VERSION("1.0");
+MODULE_ALIAS_L2TP_PWTYPE(5);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 79649937ec71..ec22078b0914 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -655,3 +655,4 @@ MODULE_VERSION("1.0");
  * enums
  */
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d1ded3777815..aca38d8aed8e 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -801,3 +801,4 @@ MODULE_VERSION("1.0");
  * enums
  */
 MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 9e13c2ff8789..f93c5be612a7 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -576,6 +576,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_MRU])
 		cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
 
+#ifdef CONFIG_MODULES
+	if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) {
+		genl_unlock();
+		request_module("net-l2tp-type-%u", cfg.pw_type);
+		genl_lock();
+	}
+#endif
 	if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
 	    (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
 		ret = -EPROTONOSUPPORT;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f56c9f69e9f2..1ad18c55064c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1863,3 +1863,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(PPPOL2TP_DRV_VERSION);
 MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP));
+MODULE_ALIAS_L2TP_PWTYPE(11);
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
new file mode 100644
index 000000000000..5d47325037bc
--- /dev/null
+++ b/net/l3mdev/Kconfig
@@ -0,0 +1,10 @@
+#
+# Configuration for L3 master device support
+#
+
+config NET_L3_MASTER_DEV
+	bool "L3 Master device support"
+	depends on INET || IPV6
+	---help---
+	  This module provides glue between core networking code and device
+	  drivers to support L3 master devices like VRF.
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
new file mode 100644
index 000000000000..84a53a6f609a
--- /dev/null
+++ b/net/l3mdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the L3 device API
+#
+
+obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
new file mode 100644
index 000000000000..8e5ead366e7f
--- /dev/null
+++ b/net/l3mdev/l3mdev.c
@@ -0,0 +1,92 @@
+/*
+ * net/l3mdev/l3mdev.c - L3 master device implementation
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <net/l3mdev.h>
+
+/**
+ *	l3mdev_master_ifindex - get index of L3 master device
+ *	@dev: targeted interface
+ */
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+	int ifindex = 0;
+
+	if (!dev)
+		return 0;
+
+	if (netif_is_l3_master(dev)) {
+		ifindex = dev->ifindex;
+	} else if (netif_is_l3_slave(dev)) {
+		struct net_device *master;
+
+		master = netdev_master_upper_dev_get_rcu(dev);
+		if (master)
+			ifindex = master->ifindex;
+	}
+
+	return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
+
+/**
+ *	l3mdev_fib_table - get FIB table id associated with an L3
+ *                             master interface
+ *	@dev: targeted interface
+ */
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+	u32 tb_id = 0;
+
+	if (!dev)
+		return 0;
+
+	if (netif_is_l3_master(dev)) {
+		if (dev->l3mdev_ops->l3mdev_fib_table)
+			tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
+	} else if (netif_is_l3_slave(dev)) {
+		/* Users of netdev_master_upper_dev_get_rcu need non-const,
+		 * but current inet_*type functions take a const
+		 */
+		struct net_device *_dev = (struct net_device *) dev;
+		const struct net_device *master;
+
+		master = netdev_master_upper_dev_get_rcu(_dev);
+		if (master &&
+		    master->l3mdev_ops->l3mdev_fib_table)
+			tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
+	}
+
+	return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu);
+
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+	u32 tb_id = 0;
+
+	if (!ifindex)
+		return 0;
+
+	rcu_read_lock();
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (dev)
+		tb_id = l3mdev_fib_table_rcu(dev);
+
+	rcu_read_unlock();
+
+	return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 783e891b7525..f9137a8341f4 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -27,7 +27,6 @@ mac80211-y := \
 	key.o \
 	util.o \
 	wme.o \
-	event.o \
 	chan.o \
 	trace.o mlme.o \
 	tdls.o \
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 5c564a68fb50..10ad4ac1fa0b 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -79,7 +79,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	       (int)reason);
 
 	if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
-			     &sta->sta, tid, NULL, 0))
+			     &sta->sta, tid, NULL, 0, false))
 		sdata_info(sta->sdata,
 			   "HW problem - can not stop rx aggregation for %pM tid %d\n",
 			   sta->sta.addr, tid);
@@ -189,6 +189,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
+	bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU);
 	u16 capab;
 
 	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
@@ -217,7 +218,8 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
 	mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
 
-	capab = (u16)(policy << 1);	/* bit 1 aggregation policy */
+	capab = (u16)(amsdu << 0);	/* bit 0 A-MSDU support */
+	capab |= (u16)(policy << 1);	/* bit 1 aggregation policy */
 	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
 	capab |= (u16)(buf_size << 6);	/* bit 15:6 max size of aggregation */
 
@@ -321,7 +323,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 		__skb_queue_head_init(&tid_agg_rx->reorder_buf[i]);
 
 	ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
-			       &sta->sta, tid, &start_seq_num, 0);
+			       &sta->sta, tid, &start_seq_num, 0, false);
 	ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n",
 	       sta->sta.addr, tid, ret);
 	if (ret) {
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c8ba2e77737c..a758eb84e8f0 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -97,7 +97,8 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
 
 	mgmt->u.action.u.addba_req.dialog_token = dialog_token;
-	capab = (u16)(1 << 1);		/* bit 1 aggregation policy */
+	capab = (u16)(1 << 0);		/* bit 0 A-MSDU support */
+	capab |= (u16)(1 << 1);		/* bit 1 aggregation policy */
 	capab |= (u16)(tid << 2); 	/* bit 5:2 TID number */
 	capab |= (u16)(agg_size << 6);	/* bit 15:6 max size of aggergation */
 
@@ -331,7 +332,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 			return -EALREADY;
 		ret = drv_ampdu_action(local, sta->sdata,
 				       IEEE80211_AMPDU_TX_STOP_FLUSH_CONT,
-				       &sta->sta, tid, NULL, 0);
+				       &sta->sta, tid, NULL, 0, false);
 		WARN_ON_ONCE(ret);
 		return 0;
 	}
@@ -381,7 +382,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
 	tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST;
 
 	ret = drv_ampdu_action(local, sta->sdata, action,
-			       &sta->sta, tid, NULL, 0);
+			       &sta->sta, tid, NULL, 0, false);
 
 	/* HW shall not deny going back to legacy */
 	if (WARN_ON(ret)) {
@@ -469,7 +470,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 	start_seq_num = sta->tid_seq[tid] >> 4;
 
 	ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
-			       &sta->sta, tid, &start_seq_num, 0);
+			       &sta->sta, tid, &start_seq_num, 0, false);
 	if (ret) {
 		ht_dbg(sdata,
 		       "BA request denied - HW unavailable for %pM tid %d\n",
@@ -693,7 +694,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 
 	drv_ampdu_action(local, sta->sdata,
 			 IEEE80211_AMPDU_TX_OPERATIONAL,
-			 &sta->sta, tid, NULL, tid_tx->buf_size);
+			 &sta->sta, tid, NULL, tid_tx->buf_size,
+			 tid_tx->amsdu);
 
 	/*
 	 * synchronize with TX path, while splicing the TX path
@@ -918,8 +920,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	struct tid_ampdu_tx *tid_tx;
 	u16 capab, tid;
 	u8 buf_size;
+	bool amsdu;
 
 	capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
+	amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
 
@@ -968,6 +972,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 		}
 
 		tid_tx->buf_size = buf_size;
+		tid_tx->amsdu = amsdu;
 
 		if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
 			ieee80211_agg_tx_operational(local, sta, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7a77a1470f25..c2bd1b6a6922 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -17,7 +17,6 @@
 #include <net/cfg80211.h>
 #include "ieee80211_i.h"
 #include "driver-ops.h"
-#include "cfg.h"
 #include "rate.h"
 #include "mesh.h"
 #include "wme.h"
@@ -469,45 +468,6 @@ void sta_set_rate_info_tx(struct sta_info *sta,
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
 }
 
-void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
-{
-	rinfo->flags = 0;
-
-	if (sta->last_rx_rate_flag & RX_FLAG_HT) {
-		rinfo->flags |= RATE_INFO_FLAGS_MCS;
-		rinfo->mcs = sta->last_rx_rate_idx;
-	} else if (sta->last_rx_rate_flag & RX_FLAG_VHT) {
-		rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS;
-		rinfo->nss = sta->last_rx_rate_vht_nss;
-		rinfo->mcs = sta->last_rx_rate_idx;
-	} else {
-		struct ieee80211_supported_band *sband;
-		int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
-		u16 brate;
-
-		sband = sta->local->hw.wiphy->bands[
-				ieee80211_get_sdata_band(sta->sdata)];
-		brate = sband->bitrates[sta->last_rx_rate_idx].bitrate;
-		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
-	}
-
-	if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
-		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
-
-	if (sta->last_rx_rate_flag & RX_FLAG_5MHZ)
-		rinfo->bw = RATE_INFO_BW_5;
-	else if (sta->last_rx_rate_flag & RX_FLAG_10MHZ)
-		rinfo->bw = RATE_INFO_BW_10;
-	else if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
-		rinfo->bw = RATE_INFO_BW_40;
-	else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
-		rinfo->bw = RATE_INFO_BW_80;
-	else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
-		rinfo->bw = RATE_INFO_BW_160;
-	else
-		rinfo->bw = RATE_INFO_BW_20;
-}
-
 static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
 				  int idx, u8 *mac, struct station_info *sinfo)
 {
@@ -981,7 +941,7 @@ static int sta_apply_auth_flags(struct ieee80211_local *local,
 		 * well. Some drivers require rate control initialized
 		 * before drv_sta_state() is called.
 		 */
-		if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+		if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
 			rate_control_rate_init(sta);
 
 		ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
@@ -1120,8 +1080,11 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	    local->hw.queues >= IEEE80211_NUM_ACS)
 		sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
 
-	/* auth flags will be set later for TDLS stations */
-	if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+	/* auth flags will be set later for TDLS,
+	 * and for unassociated stations that move to assocaited */
+	if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+	    !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) &&
+	      (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
 		ret = sta_apply_auth_flags(local, sta, mask, set);
 		if (ret)
 			return ret;
@@ -1135,6 +1098,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	}
 
 	if (mask & BIT(NL80211_STA_FLAG_MFP)) {
+		sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP));
 		if (set & BIT(NL80211_STA_FLAG_MFP))
 			set_sta_flag(sta, WLAN_STA_MFP);
 		else
@@ -1156,6 +1120,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH);
 
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+	    !sdata->u.mgd.tdls_wider_bw_prohibited &&
 	    ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
 	    params->ext_capab_len >= 8 &&
 	    params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED)
@@ -1212,7 +1177,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 		sta_apply_mesh_params(local, sta, params);
 
 	/* set the STA state after all sta info from usermode has been set */
-	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
+	    set & BIT(NL80211_STA_FLAG_ASSOCIATED)) {
 		ret = sta_apply_auth_flags(local, sta, mask, set);
 		if (ret)
 			return ret;
@@ -1254,12 +1220,14 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	 * defaults -- if userspace wants something else we'll
 	 * change it accordingly in sta_apply_parameters()
 	 */
-	if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
+	if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
+	    !(params->sta_flags_set & (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+					BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
 		sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
 		sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
-	} else {
-		sta->sta.tdls = true;
 	}
+	if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+		sta->sta.tdls = true;
 
 	err = sta_apply_parameters(local, sta, params);
 	if (err) {
@@ -1268,10 +1236,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	/*
-	 * for TDLS, rate control should be initialized only when
-	 * rates are known and station is marked authorized
+	 * for TDLS and for unassociated station, rate control should be
+	 * initialized only when rates are known and station is marked
+	 * authorized/associated
 	 */
-	if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+	if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+	    test_sta_flag(sta, WLAN_STA_ASSOC))
 		rate_control_rate_init(sta);
 
 	layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
@@ -1346,7 +1316,10 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 		break;
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
-		statype = CFG80211_STA_AP_CLIENT;
+		if (test_sta_flag(sta, WLAN_STA_ASSOC))
+			statype = CFG80211_STA_AP_CLIENT;
+		else
+			statype = CFG80211_STA_AP_CLIENT_UNASSOC;
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -1415,7 +1388,7 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 
 	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
 	    params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
-		ieee80211_recalc_ps(local, -1);
+		ieee80211_recalc_ps(local);
 		ieee80211_recalc_ps_vif(sdata);
 	}
 
@@ -2037,12 +2010,12 @@ ieee80211_sched_scan_start(struct wiphy *wiphy,
 static int
 ieee80211_sched_scan_stop(struct wiphy *wiphy, struct net_device *dev)
 {
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 
-	if (!sdata->local->ops->sched_scan_stop)
+	if (!local->ops->sched_scan_stop)
 		return -EOPNOTSUPP;
 
-	return ieee80211_request_sched_scan_stop(sdata);
+	return ieee80211_request_sched_scan_stop(local);
 }
 
 static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
@@ -2450,7 +2423,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 	if (ieee80211_hw_check(&local->hw, SUPPORTS_DYNAMIC_PS))
 		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
 
-	ieee80211_recalc_ps(local, -1);
+	ieee80211_recalc_ps(local);
 	ieee80211_recalc_ps_vif(sdata);
 
 	return 0;
@@ -3522,18 +3495,32 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
 					  u16 frame_type, bool reg)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
 
 	switch (frame_type) {
 	case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ:
-		if (reg)
+		if (reg) {
 			local->probe_req_reg++;
-		else
-			local->probe_req_reg--;
+			sdata->vif.probe_req_reg++;
+		} else {
+			if (local->probe_req_reg)
+				local->probe_req_reg--;
+
+			if (sdata->vif.probe_req_reg)
+				sdata->vif.probe_req_reg--;
+		}
 
 		if (!local->open_count)
 			break;
 
-		ieee80211_queue_work(&local->hw, &local->reconfig_filter);
+		if (sdata->vif.probe_req_reg == 1)
+			drv_config_iface_filter(local, sdata, FIF_PROBE_REQ,
+						FIF_PROBE_REQ);
+		else if (sdata->vif.probe_req_reg == 0)
+			drv_config_iface_filter(local, sdata, 0,
+						FIF_PROBE_REQ);
+
+		ieee80211_configure_filter(local);
 		break;
 	default:
 		break;
diff --git a/net/mac80211/cfg.h b/net/mac80211/cfg.h
deleted file mode 100644
index 2d51f62dc76c..000000000000
--- a/net/mac80211/cfg.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * mac80211 configuration hooks for cfg80211
- */
-#ifndef __CFG_H
-#define __CFG_H
-
-extern const struct cfg80211_ops mac80211_config_ops;
-
-#endif /* __CFG_H */
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1560c8482bcb..4d2aaebd4f97 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -123,6 +123,8 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
 	FLAG(SUPPORTS_CLONED_SKBS),
 	FLAG(SINGLE_SCAN_ON_ALL_BANDS),
 	FLAG(TDLS_WIDER_BW),
+	FLAG(SUPPORTS_AMSDU_IN_AMPDU),
+	FLAG(BEACON_TX_STATUS),
 
 	/* keep last for the build bug below */
 	(void *)0x1
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 702ca122c498..7961e7d0b61e 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -2,6 +2,7 @@
  * Copyright 2003-2005	Devicescape Software, Inc.
  * Copyright (c) 2006	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2015	Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -34,6 +35,14 @@ static const struct file_operations key_ ##name## _ops = {		\
 	.llseek = generic_file_llseek,					\
 }
 
+#define KEY_OPS_W(name)							\
+static const struct file_operations key_ ##name## _ops = {		\
+	.read = key_##name##_read,					\
+	.write = key_##name##_write,					\
+	.open = simple_open,						\
+	.llseek = generic_file_llseek,					\
+}
+
 #define KEY_FILE(name, format)						\
 		 KEY_READ_##format(name)				\
 		 KEY_OPS(name)
@@ -74,6 +83,41 @@ static ssize_t key_algorithm_read(struct file *file,
 }
 KEY_OPS(algorithm);
 
+static ssize_t key_tx_spec_write(struct file *file, const char __user *userbuf,
+				 size_t count, loff_t *ppos)
+{
+	struct ieee80211_key *key = file->private_data;
+	u64 pn;
+	int ret;
+
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
+		return -EINVAL;
+	case WLAN_CIPHER_SUITE_TKIP:
+		/* not supported yet */
+		return -EOPNOTSUPP;
+	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		ret = kstrtou64_from_user(userbuf, count, 16, &pn);
+		if (ret)
+			return ret;
+		/* PN is a 48-bit counter */
+		if (pn >= (1ULL << 48))
+			return -ERANGE;
+		atomic64_set(&key->conf.tx_pn, pn);
+		return count;
+	default:
+		return 0;
+	}
+}
+
 static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 				size_t count, loff_t *ppos)
 {
@@ -110,7 +154,7 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 	}
 	return simple_read_from_buffer(userbuf, count, ppos, buf, len);
 }
-KEY_OPS(tx_spec);
+KEY_OPS_W(tx_spec);
 
 static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 				size_t count, loff_t *ppos)
@@ -278,6 +322,9 @@ KEY_OPS(key);
 #define DEBUGFS_ADD(name) \
 	debugfs_create_file(#name, 0400, key->debugfs.dir, \
 			    key, &key_##name##_ops);
+#define DEBUGFS_ADD_W(name) \
+	debugfs_create_file(#name, 0600, key->debugfs.dir, \
+			    key, &key_##name##_ops);
 
 void ieee80211_debugfs_key_add(struct ieee80211_key *key)
 {
@@ -310,7 +357,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
 	DEBUGFS_ADD(keyidx);
 	DEBUGFS_ADD(hw_key_idx);
 	DEBUGFS_ADD(algorithm);
-	DEBUGFS_ADD(tx_spec);
+	DEBUGFS_ADD_W(tx_spec);
 	DEBUGFS_ADD(rx_spec);
 	DEBUGFS_ADD(replays);
 	DEBUGFS_ADD(icverrors);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 1021e87c051f..37ea30e0754c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -114,14 +114,6 @@ static ssize_t ieee80211_if_fmt_##name(					\
 	return scnprintf(buf, buflen, "%pM\n", sdata->field);		\
 }
 
-#define IEEE80211_IF_FMT_DEC_DIV_16(name, field)			\
-static ssize_t ieee80211_if_fmt_##name(					\
-	const struct ieee80211_sub_if_data *sdata,			\
-	char *buf, int buflen)						\
-{									\
-	return scnprintf(buf, buflen, "%d\n", sdata->field / 16);	\
-}
-
 #define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field)			\
 static ssize_t ieee80211_if_fmt_##name(					\
 	const struct ieee80211_sub_if_data *sdata,			\
@@ -247,8 +239,6 @@ IEEE80211_IF_FILE_R(hw_queues);
 /* STA attributes */
 IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
 IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
-IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
-IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
 IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
 
 static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
@@ -455,6 +445,34 @@ static ssize_t ieee80211_if_parse_uapsd_max_sp_len(
 }
 IEEE80211_IF_FILE_RW(uapsd_max_sp_len);
 
+static ssize_t ieee80211_if_fmt_tdls_wider_bw(
+	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+	const struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	bool tdls_wider_bw;
+
+	tdls_wider_bw = ieee80211_hw_check(&sdata->local->hw, TDLS_WIDER_BW) &&
+			!ifmgd->tdls_wider_bw_prohibited;
+
+	return snprintf(buf, buflen, "%d\n", tdls_wider_bw);
+}
+
+static ssize_t ieee80211_if_parse_tdls_wider_bw(
+	struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
+{
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	u8 val;
+	int ret;
+
+	ret = kstrtou8(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	ifmgd->tdls_wider_bw_prohibited = !val;
+	return buflen;
+}
+IEEE80211_IF_FILE_RW(tdls_wider_bw);
+
 /* AP attributes */
 IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC);
 IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC);
@@ -606,14 +624,13 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(bssid);
 	DEBUGFS_ADD(aid);
-	DEBUGFS_ADD(last_beacon);
-	DEBUGFS_ADD(ave_beacon);
 	DEBUGFS_ADD(beacon_timeout);
 	DEBUGFS_ADD_MODE(smps, 0600);
 	DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
 	DEBUGFS_ADD_MODE(beacon_loss, 0200);
 	DEBUGFS_ADD_MODE(uapsd_queues, 0600);
 	DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600);
+	DEBUGFS_ADD_MODE(tdls_wider_bw, 0600);
 }
 
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 06d52935036d..a39512f09f9e 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -50,7 +50,6 @@ static const struct file_operations sta_ ##name## _ops = {		\
 		STA_OPS(name)
 
 STA_FILE(aid, sta.aid, D);
-STA_FILE(last_ack_signal, last_ack_signal, D);
 
 static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
 			      size_t count, loff_t *ppos)
@@ -366,11 +365,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DEBUGFS_ADD(agg_status);
 	DEBUGFS_ADD(ht_capa);
 	DEBUGFS_ADD(vht_capa);
-	DEBUGFS_ADD(last_ack_signal);
 
-	DEBUGFS_ADD_COUNTER(rx_duplicates, num_duplicates);
-	DEBUGFS_ADD_COUNTER(rx_fragments, rx_fragments);
-	DEBUGFS_ADD_COUNTER(tx_filtered, tx_filtered_count);
+	DEBUGFS_ADD_COUNTER(rx_duplicates, rx_stats.num_duplicates);
+	DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments);
+	DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered);
 
 	if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
 		debugfs_create_x32("driver_buffered_tids", 0400,
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index 267c3b1ca047..ca1fe5576103 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -1,4 +1,6 @@
 /*
+ * Copyright 2015 Intel Deutschland GmbH
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -8,6 +10,102 @@
 #include "trace.h"
 #include "driver-ops.h"
 
+int drv_start(struct ieee80211_local *local)
+{
+	int ret;
+
+	might_sleep();
+
+	if (WARN_ON(local->started))
+		return -EALREADY;
+
+	trace_drv_start(local);
+	local->started = true;
+	/* allow rx frames */
+	smp_mb();
+	ret = local->ops->start(&local->hw);
+	trace_drv_return_int(local, ret);
+
+	if (ret)
+		local->started = false;
+
+	return ret;
+}
+
+void drv_stop(struct ieee80211_local *local)
+{
+	might_sleep();
+
+	if (WARN_ON(!local->started))
+		return;
+
+	trace_drv_stop(local);
+	local->ops->stop(&local->hw);
+	trace_drv_return_void(local);
+
+	/* sync away all work on the tasklet before clearing started */
+	tasklet_disable(&local->tasklet);
+	tasklet_enable(&local->tasklet);
+
+	barrier();
+
+	local->started = false;
+}
+
+int drv_add_interface(struct ieee80211_local *local,
+		      struct ieee80211_sub_if_data *sdata)
+{
+	int ret;
+
+	might_sleep();
+
+	if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+		    (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+		     !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
+		     !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
+		return -EINVAL;
+
+	trace_drv_add_interface(local, sdata);
+	ret = local->ops->add_interface(&local->hw, &sdata->vif);
+	trace_drv_return_int(local, ret);
+
+	if (ret == 0)
+		sdata->flags |= IEEE80211_SDATA_IN_DRIVER;
+
+	return ret;
+}
+
+int drv_change_interface(struct ieee80211_local *local,
+			 struct ieee80211_sub_if_data *sdata,
+			 enum nl80211_iftype type, bool p2p)
+{
+	int ret;
+
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
+
+	trace_drv_change_interface(local, sdata, type, p2p);
+	ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
+	trace_drv_return_int(local, ret);
+	return ret;
+}
+
+void drv_remove_interface(struct ieee80211_local *local,
+			  struct ieee80211_sub_if_data *sdata)
+{
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_remove_interface(local, sdata);
+	local->ops->remove_interface(&local->hw, &sdata->vif);
+	sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
+	trace_drv_return_void(local);
+}
+
 __must_check
 int drv_sta_state(struct ieee80211_local *local,
 		  struct ieee80211_sub_if_data *sdata,
@@ -39,3 +137,173 @@ int drv_sta_state(struct ieee80211_local *local,
 	trace_drv_return_int(local, ret);
 	return ret;
 }
+
+void drv_sta_rc_update(struct ieee80211_local *local,
+		       struct ieee80211_sub_if_data *sdata,
+		       struct ieee80211_sta *sta, u32 changed)
+{
+	sdata = get_bss_sdata(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
+		(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+		 sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
+
+	trace_drv_sta_rc_update(local, sdata, sta, changed);
+	if (local->ops->sta_rc_update)
+		local->ops->sta_rc_update(&local->hw, &sdata->vif,
+					  sta, changed);
+
+	trace_drv_return_void(local);
+}
+
+int drv_conf_tx(struct ieee80211_local *local,
+		struct ieee80211_sub_if_data *sdata, u16 ac,
+		const struct ieee80211_tx_queue_params *params)
+{
+	int ret = -EOPNOTSUPP;
+
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
+
+	if (WARN_ONCE(params->cw_min == 0 ||
+		      params->cw_min > params->cw_max,
+		      "%s: invalid CW_min/CW_max: %d/%d\n",
+		      sdata->name, params->cw_min, params->cw_max))
+		return -EINVAL;
+
+	trace_drv_conf_tx(local, sdata, ac, params);
+	if (local->ops->conf_tx)
+		ret = local->ops->conf_tx(&local->hw, &sdata->vif,
+					  ac, params);
+	trace_drv_return_int(local, ret);
+	return ret;
+}
+
+u64 drv_get_tsf(struct ieee80211_local *local,
+		struct ieee80211_sub_if_data *sdata)
+{
+	u64 ret = -1ULL;
+
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return ret;
+
+	trace_drv_get_tsf(local, sdata);
+	if (local->ops->get_tsf)
+		ret = local->ops->get_tsf(&local->hw, &sdata->vif);
+	trace_drv_return_u64(local, ret);
+	return ret;
+}
+
+void drv_set_tsf(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 u64 tsf)
+{
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_set_tsf(local, sdata, tsf);
+	if (local->ops->set_tsf)
+		local->ops->set_tsf(&local->hw, &sdata->vif, tsf);
+	trace_drv_return_void(local);
+}
+
+void drv_reset_tsf(struct ieee80211_local *local,
+		   struct ieee80211_sub_if_data *sdata)
+{
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_reset_tsf(local, sdata);
+	if (local->ops->reset_tsf)
+		local->ops->reset_tsf(&local->hw, &sdata->vif);
+	trace_drv_return_void(local);
+}
+
+int drv_switch_vif_chanctx(struct ieee80211_local *local,
+			   struct ieee80211_vif_chanctx_switch *vifs,
+			   int n_vifs, enum ieee80211_chanctx_switch_mode mode)
+{
+	int ret = 0;
+	int i;
+
+	might_sleep();
+
+	if (!local->ops->switch_vif_chanctx)
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < n_vifs; i++) {
+		struct ieee80211_chanctx *new_ctx =
+			container_of(vifs[i].new_ctx,
+				     struct ieee80211_chanctx,
+				     conf);
+		struct ieee80211_chanctx *old_ctx =
+			container_of(vifs[i].old_ctx,
+				     struct ieee80211_chanctx,
+				     conf);
+
+		WARN_ON_ONCE(!old_ctx->driver_present);
+		WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
+			      new_ctx->driver_present) ||
+			     (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
+			      !new_ctx->driver_present));
+	}
+
+	trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
+	ret = local->ops->switch_vif_chanctx(&local->hw,
+					     vifs, n_vifs, mode);
+	trace_drv_return_int(local, ret);
+
+	if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
+		for (i = 0; i < n_vifs; i++) {
+			struct ieee80211_chanctx *new_ctx =
+				container_of(vifs[i].new_ctx,
+					     struct ieee80211_chanctx,
+					     conf);
+			struct ieee80211_chanctx *old_ctx =
+				container_of(vifs[i].old_ctx,
+					     struct ieee80211_chanctx,
+					     conf);
+
+			new_ctx->driver_present = true;
+			old_ctx->driver_present = false;
+		}
+	}
+
+	return ret;
+}
+
+int drv_ampdu_action(struct ieee80211_local *local,
+		     struct ieee80211_sub_if_data *sdata,
+		     enum ieee80211_ampdu_mlme_action action,
+		     struct ieee80211_sta *sta, u16 tid,
+		     u16 *ssn, u8 buf_size, bool amsdu)
+{
+	int ret = -EOPNOTSUPP;
+
+	might_sleep();
+
+	sdata = get_bss_sdata(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return -EIO;
+
+	trace_drv_ampdu_action(local, sdata, action, sta, tid,
+			       ssn, buf_size, amsdu);
+
+	if (local->ops->ampdu_action)
+		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
+					       sta, tid, ssn, buf_size, amsdu);
+
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 02d91332d7dd..154ce4b13406 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -66,36 +66,8 @@ static inline int drv_get_et_sset_count(struct ieee80211_sub_if_data *sdata,
 	return rv;
 }
 
-static inline int drv_start(struct ieee80211_local *local)
-{
-	int ret;
-
-	might_sleep();
-
-	trace_drv_start(local);
-	local->started = true;
-	smp_mb();
-	ret = local->ops->start(&local->hw);
-	trace_drv_return_int(local, ret);
-	return ret;
-}
-
-static inline void drv_stop(struct ieee80211_local *local)
-{
-	might_sleep();
-
-	trace_drv_stop(local);
-	local->ops->stop(&local->hw);
-	trace_drv_return_void(local);
-
-	/* sync away all work on the tasklet before clearing started */
-	tasklet_disable(&local->tasklet);
-	tasklet_enable(&local->tasklet);
-
-	barrier();
-
-	local->started = false;
-}
+int drv_start(struct ieee80211_local *local);
+void drv_stop(struct ieee80211_local *local);
 
 #ifdef CONFIG_PM
 static inline int drv_suspend(struct ieee80211_local *local,
@@ -137,59 +109,15 @@ static inline void drv_set_wakeup(struct ieee80211_local *local,
 }
 #endif
 
-static inline int drv_add_interface(struct ieee80211_local *local,
-				    struct ieee80211_sub_if_data *sdata)
-{
-	int ret;
-
-	might_sleep();
-
-	if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
-		    (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
-		     !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
-		     !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
-		return -EINVAL;
-
-	trace_drv_add_interface(local, sdata);
-	ret = local->ops->add_interface(&local->hw, &sdata->vif);
-	trace_drv_return_int(local, ret);
-
-	if (ret == 0)
-		sdata->flags |= IEEE80211_SDATA_IN_DRIVER;
-
-	return ret;
-}
-
-static inline int drv_change_interface(struct ieee80211_local *local,
-				       struct ieee80211_sub_if_data *sdata,
-				       enum nl80211_iftype type, bool p2p)
-{
-	int ret;
-
-	might_sleep();
-
-	if (!check_sdata_in_driver(sdata))
-		return -EIO;
-
-	trace_drv_change_interface(local, sdata, type, p2p);
-	ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
-	trace_drv_return_int(local, ret);
-	return ret;
-}
+int drv_add_interface(struct ieee80211_local *local,
+		      struct ieee80211_sub_if_data *sdata);
 
-static inline void drv_remove_interface(struct ieee80211_local *local,
-					struct ieee80211_sub_if_data *sdata)
-{
-	might_sleep();
+int drv_change_interface(struct ieee80211_local *local,
+			 struct ieee80211_sub_if_data *sdata,
+			 enum nl80211_iftype type, bool p2p);
 
-	if (!check_sdata_in_driver(sdata))
-		return;
-
-	trace_drv_remove_interface(local, sdata);
-	local->ops->remove_interface(&local->hw, &sdata->vif);
-	sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
-	trace_drv_return_void(local);
-}
+void drv_remove_interface(struct ieee80211_local *local,
+			  struct ieee80211_sub_if_data *sdata);
 
 static inline int drv_config(struct ieee80211_local *local, u32 changed)
 {
@@ -260,6 +188,22 @@ static inline void drv_configure_filter(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
+static inline void drv_config_iface_filter(struct ieee80211_local *local,
+					   struct ieee80211_sub_if_data *sdata,
+					   unsigned int filter_flags,
+					   unsigned int changed_flags)
+{
+	might_sleep();
+
+	trace_drv_config_iface_filter(local, sdata, filter_flags,
+				      changed_flags);
+	if (local->ops->config_iface_filter)
+		local->ops->config_iface_filter(&local->hw, &sdata->vif,
+						filter_flags,
+						changed_flags);
+	trace_drv_return_void(local);
+}
+
 static inline int drv_set_tim(struct ieee80211_local *local,
 			      struct ieee80211_sta *sta, bool set)
 {
@@ -580,25 +524,9 @@ int drv_sta_state(struct ieee80211_local *local,
 		  enum ieee80211_sta_state old_state,
 		  enum ieee80211_sta_state new_state);
 
-static inline void drv_sta_rc_update(struct ieee80211_local *local,
-				     struct ieee80211_sub_if_data *sdata,
-				     struct ieee80211_sta *sta, u32 changed)
-{
-	sdata = get_bss_sdata(sdata);
-	if (!check_sdata_in_driver(sdata))
-		return;
-
-	WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
-		(sdata->vif.type != NL80211_IFTYPE_ADHOC &&
-		 sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
-
-	trace_drv_sta_rc_update(local, sdata, sta, changed);
-	if (local->ops->sta_rc_update)
-		local->ops->sta_rc_update(&local->hw, &sdata->vif,
-					  sta, changed);
-
-	trace_drv_return_void(local);
-}
+void drv_sta_rc_update(struct ieee80211_local *local,
+		       struct ieee80211_sub_if_data *sdata,
+		       struct ieee80211_sta *sta, u32 changed);
 
 static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local,
 					   struct ieee80211_sub_if_data *sdata,
@@ -630,76 +558,17 @@ static inline void drv_sta_statistics(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
-static inline int drv_conf_tx(struct ieee80211_local *local,
-			      struct ieee80211_sub_if_data *sdata, u16 ac,
-			      const struct ieee80211_tx_queue_params *params)
-{
-	int ret = -EOPNOTSUPP;
+int drv_conf_tx(struct ieee80211_local *local,
+		struct ieee80211_sub_if_data *sdata, u16 ac,
+		const struct ieee80211_tx_queue_params *params);
 
-	might_sleep();
-
-	if (!check_sdata_in_driver(sdata))
-		return -EIO;
-
-	if (WARN_ONCE(params->cw_min == 0 ||
-		      params->cw_min > params->cw_max,
-		      "%s: invalid CW_min/CW_max: %d/%d\n",
-		      sdata->name, params->cw_min, params->cw_max))
-		return -EINVAL;
-
-	trace_drv_conf_tx(local, sdata, ac, params);
-	if (local->ops->conf_tx)
-		ret = local->ops->conf_tx(&local->hw, &sdata->vif,
-					  ac, params);
-	trace_drv_return_int(local, ret);
-	return ret;
-}
-
-static inline u64 drv_get_tsf(struct ieee80211_local *local,
-			      struct ieee80211_sub_if_data *sdata)
-{
-	u64 ret = -1ULL;
-
-	might_sleep();
-
-	if (!check_sdata_in_driver(sdata))
-		return ret;
-
-	trace_drv_get_tsf(local, sdata);
-	if (local->ops->get_tsf)
-		ret = local->ops->get_tsf(&local->hw, &sdata->vif);
-	trace_drv_return_u64(local, ret);
-	return ret;
-}
-
-static inline void drv_set_tsf(struct ieee80211_local *local,
-			       struct ieee80211_sub_if_data *sdata,
-			       u64 tsf)
-{
-	might_sleep();
-
-	if (!check_sdata_in_driver(sdata))
-		return;
-
-	trace_drv_set_tsf(local, sdata, tsf);
-	if (local->ops->set_tsf)
-		local->ops->set_tsf(&local->hw, &sdata->vif, tsf);
-	trace_drv_return_void(local);
-}
-
-static inline void drv_reset_tsf(struct ieee80211_local *local,
-				 struct ieee80211_sub_if_data *sdata)
-{
-	might_sleep();
-
-	if (!check_sdata_in_driver(sdata))
-		return;
-
-	trace_drv_reset_tsf(local, sdata);
-	if (local->ops->reset_tsf)
-		local->ops->reset_tsf(&local->hw, &sdata->vif);
-	trace_drv_return_void(local);
-}
+u64 drv_get_tsf(struct ieee80211_local *local,
+		struct ieee80211_sub_if_data *sdata);
+void drv_set_tsf(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 u64 tsf);
+void drv_reset_tsf(struct ieee80211_local *local,
+		   struct ieee80211_sub_if_data *sdata);
 
 static inline int drv_tx_last_beacon(struct ieee80211_local *local)
 {
@@ -714,30 +583,11 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
 	return ret;
 }
 
-static inline int drv_ampdu_action(struct ieee80211_local *local,
-				   struct ieee80211_sub_if_data *sdata,
-				   enum ieee80211_ampdu_mlme_action action,
-				   struct ieee80211_sta *sta, u16 tid,
-				   u16 *ssn, u8 buf_size)
-{
-	int ret = -EOPNOTSUPP;
-
-	might_sleep();
-
-	sdata = get_bss_sdata(sdata);
-	if (!check_sdata_in_driver(sdata))
-		return -EIO;
-
-	trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
-
-	if (local->ops->ampdu_action)
-		ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
-					       sta, tid, ssn, buf_size);
-
-	trace_drv_return_int(local, ret);
-
-	return ret;
-}
+int drv_ampdu_action(struct ieee80211_local *local,
+		     struct ieee80211_sub_if_data *sdata,
+		     enum ieee80211_ampdu_mlme_action action,
+		     struct ieee80211_sta *sta, u16 tid,
+		     u16 *ssn, u8 buf_size, bool amsdu);
 
 static inline int drv_get_survey(struct ieee80211_local *local, int idx,
 				struct survey_info *survey)
@@ -993,6 +843,8 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
 {
 	int ret = -EOPNOTSUPP;
 
+	might_sleep();
+
 	trace_drv_add_chanctx(local, ctx);
 	if (local->ops->add_chanctx)
 		ret = local->ops->add_chanctx(&local->hw, &ctx->conf);
@@ -1006,6 +858,8 @@ static inline int drv_add_chanctx(struct ieee80211_local *local,
 static inline void drv_remove_chanctx(struct ieee80211_local *local,
 				      struct ieee80211_chanctx *ctx)
 {
+	might_sleep();
+
 	if (WARN_ON(!ctx->driver_present))
 		return;
 
@@ -1020,6 +874,8 @@ static inline void drv_change_chanctx(struct ieee80211_local *local,
 				      struct ieee80211_chanctx *ctx,
 				      u32 changed)
 {
+	might_sleep();
+
 	trace_drv_change_chanctx(local, ctx, changed);
 	if (local->ops->change_chanctx) {
 		WARN_ON_ONCE(!ctx->driver_present);
@@ -1053,6 +909,8 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
 					    struct ieee80211_sub_if_data *sdata,
 					    struct ieee80211_chanctx *ctx)
 {
+	might_sleep();
+
 	if (!check_sdata_in_driver(sdata))
 		return;
 
@@ -1066,64 +924,17 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
-static inline int
-drv_switch_vif_chanctx(struct ieee80211_local *local,
-		       struct ieee80211_vif_chanctx_switch *vifs,
-		       int n_vifs,
-		       enum ieee80211_chanctx_switch_mode mode)
-{
-	int ret = 0;
-	int i;
-
-	if (!local->ops->switch_vif_chanctx)
-		return -EOPNOTSUPP;
-
-	for (i = 0; i < n_vifs; i++) {
-		struct ieee80211_chanctx *new_ctx =
-			container_of(vifs[i].new_ctx,
-				     struct ieee80211_chanctx,
-				     conf);
-		struct ieee80211_chanctx *old_ctx =
-			container_of(vifs[i].old_ctx,
-				     struct ieee80211_chanctx,
-				     conf);
-
-		WARN_ON_ONCE(!old_ctx->driver_present);
-		WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
-			      new_ctx->driver_present) ||
-			     (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
-			      !new_ctx->driver_present));
-	}
-
-	trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
-	ret = local->ops->switch_vif_chanctx(&local->hw,
-					     vifs, n_vifs, mode);
-	trace_drv_return_int(local, ret);
-
-	if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
-		for (i = 0; i < n_vifs; i++) {
-			struct ieee80211_chanctx *new_ctx =
-				container_of(vifs[i].new_ctx,
-					     struct ieee80211_chanctx,
-					     conf);
-			struct ieee80211_chanctx *old_ctx =
-				container_of(vifs[i].old_ctx,
-					     struct ieee80211_chanctx,
-					     conf);
-
-			new_ctx->driver_present = true;
-			old_ctx->driver_present = false;
-		}
-	}
-
-	return ret;
-}
+int drv_switch_vif_chanctx(struct ieee80211_local *local,
+			   struct ieee80211_vif_chanctx_switch *vifs,
+			   int n_vifs, enum ieee80211_chanctx_switch_mode mode);
 
 static inline int drv_start_ap(struct ieee80211_local *local,
 			       struct ieee80211_sub_if_data *sdata)
 {
 	int ret = 0;
 
+	might_sleep();
+
 	if (!check_sdata_in_driver(sdata))
 		return -EIO;
 
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index 188faab11c24..9cc986deda61 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -40,7 +40,7 @@ static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = {
 	"rx_duplicates", "rx_fragments", "rx_dropped",
 	"tx_packets", "tx_bytes",
 	"tx_filtered", "tx_retry_failed", "tx_retries",
-	"beacon_loss", "sta_state", "txrate", "rxrate", "signal",
+	"sta_state", "txrate", "rxrate", "signal",
 	"channel", "noise", "ch_time", "ch_time_busy",
 	"ch_time_ext_busy", "ch_time_rx", "ch_time_tx"
 };
@@ -77,20 +77,19 @@ static void ieee80211_get_stats(struct net_device *dev,
 
 	memset(data, 0, sizeof(u64) * STA_STATS_LEN);
 
-#define ADD_STA_STATS(sta)				\
-	do {						\
-		data[i++] += sta->rx_packets;		\
-		data[i++] += sta->rx_bytes;		\
-		data[i++] += sta->num_duplicates;	\
-		data[i++] += sta->rx_fragments;		\
-		data[i++] += sta->rx_dropped;		\
-							\
-		data[i++] += sinfo.tx_packets;		\
-		data[i++] += sinfo.tx_bytes;		\
-		data[i++] += sta->tx_filtered_count;	\
-		data[i++] += sta->tx_retry_failed;	\
-		data[i++] += sta->tx_retry_count;	\
-		data[i++] += sta->beacon_loss_count;	\
+#define ADD_STA_STATS(sta)					\
+	do {							\
+		data[i++] += sta->rx_stats.packets;		\
+		data[i++] += sta->rx_stats.bytes;		\
+		data[i++] += sta->rx_stats.num_duplicates;	\
+		data[i++] += sta->rx_stats.fragments;		\
+		data[i++] += sta->rx_stats.dropped;		\
+								\
+		data[i++] += sinfo.tx_packets;			\
+		data[i++] += sinfo.tx_bytes;			\
+		data[i++] += sta->status_stats.filtered;	\
+		data[i++] += sta->status_stats.retry_failed;	\
+		data[i++] += sta->status_stats.retry_count;	\
 	} while (0)
 
 	/* For Managed stations, find the single station based on BSSID
diff --git a/net/mac80211/event.c b/net/mac80211/event.c
deleted file mode 100644
index 01ae759518f6..000000000000
--- a/net/mac80211/event.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * mac80211 - events
- */
-#include <net/cfg80211.h>
-#include "ieee80211_i.h"
-
-/*
- * Indicate a failed Michael MIC to userspace. If the caller knows the TSC of
- * the frame that generated the MIC failure (i.e., if it was provided by the
- * driver or is still in the frame), it should provide that information.
- */
-void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
-				     struct ieee80211_hdr *hdr, const u8 *tsc,
-				     gfp_t gfp)
-{
-	cfg80211_michael_mic_failure(sdata->dev, hdr->addr2,
-				     (hdr->addr1[0] & 0x01) ?
-				     NL80211_KEYTYPE_GROUP :
-				     NL80211_KEYTYPE_PAIRWISE,
-				     keyidx, tsc, gfp);
-}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 7f72bc9bae2e..337bb5d78003 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -188,7 +188,7 @@ ieee80211_ibss_build_presp(struct ieee80211_sub_if_data *sdata,
 		 * keep them at 0
 		 */
 		pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap,
-						 chandef, 0);
+						 chandef, 0, false);
 
 		/* add VHT capability and information IEs */
 		if (chandef->width != NL80211_CHAN_WIDTH_20 &&
@@ -229,7 +229,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	struct cfg80211_chan_def chandef;
 	struct ieee80211_channel *chan;
 	struct beacon_data *presp;
-	enum nl80211_bss_scan_width scan_width;
+	struct cfg80211_inform_bss bss_meta = {};
 	bool have_higher_than_11mbit;
 	bool radar_required;
 	int err;
@@ -356,7 +356,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	else
 		sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
 
-	ieee80211_set_wmm_default(sdata, true);
+	ieee80211_set_wmm_default(sdata, true, false);
 
 	sdata->vif.bss_conf.ibss_joined = true;
 	sdata->vif.bss_conf.ibss_creator = creator;
@@ -383,10 +383,11 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	mod_timer(&ifibss->timer,
 		  round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));
 
-	scan_width = cfg80211_chandef_to_scan_width(&chandef);
-	bss = cfg80211_inform_bss_width_frame(local->hw.wiphy, chan,
-					      scan_width, mgmt,
-					      presp->head_len, 0, GFP_KERNEL);
+	bss_meta.chan = chan;
+	bss_meta.scan_width = cfg80211_chandef_to_scan_width(&chandef);
+	bss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta, mgmt,
+					     presp->head_len, GFP_KERNEL);
+
 	cfg80211_put_bss(local->hw.wiphy, bss);
 	netif_carrier_on(sdata->dev);
 	cfg80211_ibss_joined(sdata->dev, ifibss->bssid, chan, GFP_KERNEL);
@@ -646,7 +647,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid,
 		return NULL;
 	}
 
-	sta->last_rx = jiffies;
+	sta->rx_stats.last_rx = jiffies;
 
 	/* make sure mandatory rates are always added */
 	sband = local->hw.wiphy->bands[band];
@@ -668,7 +669,8 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
 
 	list_for_each_entry_rcu(sta, &local->sta_list, list) {
 		if (sta->sdata == sdata &&
-		    time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL,
+		    time_after(sta->rx_stats.last_rx +
+			       IEEE80211_IBSS_MERGE_INTERVAL,
 			       jiffies)) {
 			active++;
 			break;
@@ -1234,7 +1236,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
 	if (!sta)
 		return;
 
-	sta->last_rx = jiffies;
+	sta->rx_stats.last_rx = jiffies;
 
 	/* make sure mandatory rates are always added */
 	sband = local->hw.wiphy->bands[band];
@@ -1252,7 +1254,7 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 	struct sta_info *sta, *tmp;
 	unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT;
-	unsigned long exp_rsn_time = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT;
+	unsigned long exp_rsn = IEEE80211_IBSS_RSN_INACTIVITY_LIMIT;
 
 	mutex_lock(&local->sta_mtx);
 
@@ -1260,8 +1262,8 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
 		if (sdata != sta->sdata)
 			continue;
 
-		if (time_after(jiffies, sta->last_rx + exp_time) ||
-		    (time_after(jiffies, sta->last_rx + exp_rsn_time) &&
+		if (time_after(jiffies, sta->rx_stats.last_rx + exp_time) ||
+		    (time_after(jiffies, sta->rx_stats.last_rx + exp_rsn) &&
 		     sta->sta_state != IEEE80211_STA_AUTHORIZED)) {
 			sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n",
 				sta->sta_state != IEEE80211_STA_AUTHORIZED ?
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6e52659f923f..d832bd59236b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -34,6 +34,8 @@
 #include "sta_info.h"
 #include "debug.h"
 
+extern const struct cfg80211_ops mac80211_config_ops;
+
 struct ieee80211_local;
 
 /* Maximum number of broadcast/multicast frames to buffer when some of the
@@ -419,6 +421,8 @@ struct ieee80211_sta_tx_tspec {
 	bool downgraded;
 };
 
+DECLARE_EWMA(beacon_signal, 16, 4)
+
 struct ieee80211_if_managed {
 	struct timer_list timer;
 	struct timer_list conn_mon_timer;
@@ -490,16 +494,7 @@ struct ieee80211_if_managed {
 
 	s16 p2p_noa_index;
 
-	/* Signal strength from the last Beacon frame in the current BSS. */
-	int last_beacon_signal;
-
-	/*
-	 * Weighted average of the signal strength from Beacon frames in the
-	 * current BSS. This is in units of 1/16 of the signal unit to maintain
-	 * accuracy and to speed up calculations, i.e., the value need to be
-	 * divided by 16 to get the actual value.
-	 */
-	int ave_beacon_signal;
+	struct ewma_beacon_signal ave_beacon_signal;
 
 	/*
 	 * Number of Beacon frames used in ave_beacon_signal. This can be used
@@ -508,6 +503,9 @@ struct ieee80211_if_managed {
 	 */
 	unsigned int count_beacon_signal;
 
+	/* Number of times beacon loss was invoked. */
+	unsigned int beacon_loss_count;
+
 	/*
 	 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
 	 * that triggered a cqm event. 0 indicates that no event has been
@@ -535,6 +533,7 @@ struct ieee80211_if_managed {
 	struct sk_buff *teardown_skb; /* A copy to send through the AP */
 	spinlock_t teardown_lock; /* To lock changing teardown_skb */
 	bool tdls_chan_switch_prohibited;
+	bool tdls_wider_bw_prohibited;
 
 	/* WMM-AC TSPEC support */
 	struct ieee80211_sta_tx_tspec tx_tspec[IEEE80211_NUM_ACS];
@@ -1311,7 +1310,6 @@ struct ieee80211_local {
 	struct work_struct dynamic_ps_enable_work;
 	struct work_struct dynamic_ps_disable_work;
 	struct timer_list dynamic_ps_timer;
-	struct notifier_block network_latency_notifier;
 	struct notifier_block ifa_notifier;
 	struct notifier_block ifa6_notifier;
 
@@ -1497,10 +1495,8 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			   struct cfg80211_disassoc_request *req);
 void ieee80211_send_pspoll(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *sdata);
-void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency);
+void ieee80211_recalc_ps(struct ieee80211_local *local);
 void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);
-int ieee80211_max_network_latency(struct notifier_block *nb,
-				  unsigned long data, void *dummy);
 int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -1577,7 +1573,7 @@ __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 				     struct cfg80211_sched_scan_request *req);
 int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 				       struct cfg80211_sched_scan_request *req);
-int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata);
+int ieee80211_request_sched_scan_stop(struct ieee80211_local *local);
 void ieee80211_sched_scan_end(struct ieee80211_local *local);
 void ieee80211_sched_scan_stopped_work(struct work_struct *work);
 
@@ -1641,6 +1637,9 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
 struct sk_buff *
 ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
 			      struct sk_buff *skb, u32 info_flags);
+void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
+			  struct ieee80211_supported_band *sband,
+			  int retry_count, int shift, bool send_to_cooked);
 
 void ieee80211_check_fast_xmit(struct sta_info *sta);
 void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
@@ -1769,11 +1768,8 @@ extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
 int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
 			     int rate, int erp, int short_preamble,
 			     int shift);
-void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
-				     struct ieee80211_hdr *hdr, const u8 *tsc,
-				     gfp_t gfp);
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
-			       bool bss_notify);
+			       bool bss_notify, bool enable_qos);
 void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 		    struct sta_info *sta, struct sk_buff *skb);
 
@@ -1853,7 +1849,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
 void ieee80211_dynamic_ps_timer(unsigned long data);
 void ieee80211_send_nullfunc(struct ieee80211_local *local,
 			     struct ieee80211_sub_if_data *sdata,
-			     int powersave);
+			     bool powersave);
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr);
 void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
@@ -1966,7 +1962,7 @@ u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 			      u16 cap);
 u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 			       const struct cfg80211_chan_def *chandef,
-			       u16 prot_mode);
+			       u16 prot_mode, bool rifs_mode);
 u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 			       u32 cap);
 u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6964fc6a8ea2..d0dc1bfaeec2 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -661,11 +661,13 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 		}
 
 		/*
-		 * set default queue parameters so drivers don't
+		 * Set default queue parameters so drivers don't
 		 * need to initialise the hardware if the hardware
-		 * doesn't start up with sane defaults
+		 * doesn't start up with sane defaults.
+		 * Enable QoS for anything but station interfaces.
 		 */
-		ieee80211_set_wmm_default(sdata, true);
+		ieee80211_set_wmm_default(sdata, true,
+			sdata->vif.type != NL80211_IFTYPE_STATION);
 	}
 
 	set_bit(SDATA_STATE_RUNNING, &sdata->state);
@@ -709,7 +711,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 	if (hw_reconf_flags)
 		ieee80211_hw_config(local, hw_reconf_flags);
 
-	ieee80211_recalc_ps(local, -1);
+	ieee80211_recalc_ps(local);
 
 	if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
 	    sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
@@ -1016,7 +1018,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
 			drv_remove_interface(local, sdata);
 	}
 
-	ieee80211_recalc_ps(local, -1);
+	ieee80211_recalc_ps(local);
 
 	if (cancel_scan)
 		flush_delayed_work(&local->scan_work);
@@ -1204,7 +1206,7 @@ static void ieee80211_iface_work(struct work_struct *work)
 	if (!ieee80211_sdata_running(sdata))
 		return;
 
-	if (local->scanning)
+	if (test_bit(SCAN_SW_SCANNING, &local->scanning))
 		return;
 
 	if (!ieee80211_can_run_worker(local))
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ff79a13d231d..858f6b1cb149 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -20,7 +20,6 @@
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/bitmap.h>
-#include <linux/pm_qos.h>
 #include <linux/inetdevice.h>
 #include <net/net_namespace.h>
 #include <net/cfg80211.h>
@@ -32,7 +31,6 @@
 #include "mesh.h"
 #include "wep.h"
 #include "led.h"
-#include "cfg.h"
 #include "debugfs.h"
 
 void ieee80211_configure_filter(struct ieee80211_local *local)
@@ -283,7 +281,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
 	local->in_reconfig = true;
 	barrier();
 
-	schedule_work(&local->restart_work);
+	queue_work(system_freezable_wq, &local->restart_work);
 }
 EXPORT_SYMBOL(ieee80211_restart_hw);
 
@@ -543,7 +541,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 			   NL80211_FEATURE_HT_IBSS |
 			   NL80211_FEATURE_VIF_TXPOWER |
 			   NL80211_FEATURE_MAC_ON_CREATE |
-			   NL80211_FEATURE_USERSPACE_MPM;
+			   NL80211_FEATURE_USERSPACE_MPM |
+			   NL80211_FEATURE_FULL_AP_CLIENT_STATE;
 
 	if (!ops->hw_scan)
 		wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
@@ -1082,13 +1081,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	rtnl_unlock();
 
-	local->network_latency_notifier.notifier_call =
-		ieee80211_max_network_latency;
-	result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
-				     &local->network_latency_notifier);
-	if (result)
-		goto fail_pm_qos;
-
 #ifdef CONFIG_INET
 	local->ifa_notifier.notifier_call = ieee80211_ifa_changed;
 	result = register_inetaddr_notifier(&local->ifa_notifier);
@@ -1113,10 +1105,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 #endif
 #if defined(CONFIG_INET) || defined(CONFIG_IPV6)
  fail_ifa:
-	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
-			       &local->network_latency_notifier);
 #endif
- fail_pm_qos:
 	rtnl_lock();
 	rate_control_deinitialize(local);
 	ieee80211_remove_interfaces(local);
@@ -1142,8 +1131,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	tasklet_kill(&local->tx_pending_tasklet);
 	tasklet_kill(&local->tasklet);
 
-	pm_qos_remove_notifier(PM_QOS_NETWORK_LATENCY,
-			       &local->network_latency_notifier);
 #ifdef CONFIG_INET
 	unregister_inetaddr_notifier(&local->ifa_notifier);
 #endif
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index e06a5ca7c9a9..fa28500f28fd 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -94,6 +94,9 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
 	ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan,
 				     ie->ht_operation, &sta_chan_def);
 
+	ieee80211_vht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan,
+				      ie->vht_operation, &sta_chan_def);
+
 	if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
 					 &sta_chan_def))
 		return false;
@@ -436,8 +439,6 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	struct ieee80211_channel *channel;
-	enum nl80211_channel_type channel_type =
-		cfg80211_get_chandef_type(&sdata->vif.bss_conf.chandef);
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_sta_ht_cap *ht_cap;
 	u8 *pos;
@@ -454,7 +455,10 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
 	sband = local->hw.wiphy->bands[channel->band];
 	ht_cap = &sband->ht_cap;
 
-	if (!ht_cap->ht_supported || channel_type == NL80211_CHAN_NO_HT)
+	if (!ht_cap->ht_supported ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
 		return 0;
 
 	if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_operation))
@@ -462,7 +466,70 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
 
 	pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
 	ieee80211_ie_build_ht_oper(pos, ht_cap, &sdata->vif.bss_conf.chandef,
-				   sdata->vif.bss_conf.ht_operation_mode);
+				   sdata->vif.bss_conf.ht_operation_mode,
+				   false);
+
+	return 0;
+}
+
+int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb)
+{
+	struct ieee80211_local *local = sdata->local;
+	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+	struct ieee80211_supported_band *sband;
+	u8 *pos;
+
+	sband = local->hw.wiphy->bands[band];
+	if (!sband->vht_cap.vht_supported ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+		return 0;
+
+	if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap))
+		return -ENOMEM;
+
+	pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_cap));
+	ieee80211_ie_build_vht_cap(pos, &sband->vht_cap, sband->vht_cap.cap);
+
+	return 0;
+}
+
+int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
+			 struct sk_buff *skb)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_chanctx_conf *chanctx_conf;
+	struct ieee80211_channel *channel;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_sta_vht_cap *vht_cap;
+	u8 *pos;
+
+	rcu_read_lock();
+	chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+	if (WARN_ON(!chanctx_conf)) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	channel = chanctx_conf->def.chan;
+	rcu_read_unlock();
+
+	sband = local->hw.wiphy->bands[channel->band];
+	vht_cap = &sband->vht_cap;
+
+	if (!vht_cap->vht_supported ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+		return 0;
+
+	if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_operation))
+		return -ENOMEM;
+
+	pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
+	ieee80211_ie_build_vht_oper(pos, vht_cap,
+				    &sdata->vif.bss_conf.chandef);
 
 	return 0;
 }
@@ -540,9 +607,9 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
  *
  * Return the header length.
  */
-int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
-			      struct ieee80211s_hdr *meshhdr,
-			      const char *addr4or5, const char *addr6)
+unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
+				       struct ieee80211s_hdr *meshhdr,
+				       const char *addr4or5, const char *addr6)
 {
 	if (WARN_ON(!addr4or5 && addr6))
 		return 0;
@@ -637,6 +704,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 		   2 + ifmsh->mesh_id_len +
 		   2 + sizeof(struct ieee80211_meshconf_ie) +
 		   2 + sizeof(__le16) + /* awake window */
+		   2 + sizeof(struct ieee80211_vht_cap) +
+		   2 + sizeof(struct ieee80211_vht_operation) +
 		   ifmsh->ie_len;
 
 	bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
@@ -718,6 +787,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 	    mesh_add_meshid_ie(sdata, skb) ||
 	    mesh_add_meshconf_ie(sdata, skb) ||
 	    mesh_add_awake_window_ie(sdata, skb) ||
+	    mesh_add_vht_cap_ie(sdata, skb) ||
+	    mesh_add_vht_oper_ie(sdata, skb) ||
 	    mesh_add_vendor_ies(sdata, skb))
 		goto out_free;
 
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 50c8473cf9dc..a1596344c3ba 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -207,9 +207,9 @@ struct mesh_rmc {
 /* Various */
 int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
 				  const u8 *da, const u8 *sa);
-int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
-			      struct ieee80211s_hdr *meshhdr,
-			      const char *addr4or5, const char *addr6);
+unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
+				       struct ieee80211s_hdr *meshhdr,
+				       const char *addr4or5, const char *addr6);
 int mesh_rmc_check(struct ieee80211_sub_if_data *sdata,
 		   const u8 *addr, struct ieee80211s_hdr *mesh_hdr);
 bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
@@ -227,6 +227,10 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
 		       struct sk_buff *skb);
 int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
 			struct sk_buff *skb);
+int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
+			struct sk_buff *skb);
+int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
+			 struct sk_buff *skb);
 void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_init(void);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d80e0a4c16cf..c6be0b4f4058 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -329,7 +329,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
 	if (sta->mesh->fail_avg >= 100)
 		return MAX_METRIC;
 
-	sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo);
+	sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
 	rate = cfg80211_calculate_bitrate(&rinfo);
 	if (WARN_ON(!rate))
 		return MAX_METRIC;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 58384642e03c..bd3d55eb21d4 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -60,7 +60,9 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
 {
 	s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold;
 	return rssi_threshold == 0 ||
-	       (sta && (s8) -ewma_signal_read(&sta->avg_signal) > rssi_threshold);
+	       (sta &&
+		(s8)-ewma_signal_read(&sta->rx_stats.avg_signal) >
+						rssi_threshold);
 }
 
 /**
@@ -226,6 +228,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 			    2 + sizeof(struct ieee80211_meshconf_ie) +
 			    2 + sizeof(struct ieee80211_ht_cap) +
 			    2 + sizeof(struct ieee80211_ht_operation) +
+			    2 + sizeof(struct ieee80211_vht_cap) +
+			    2 + sizeof(struct ieee80211_vht_operation) +
 			    2 + 8 + /* peering IE */
 			    sdata->u.mesh.ie_len);
 	if (!skb)
@@ -306,7 +310,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 
 	if (action != WLAN_SP_MESH_PEERING_CLOSE) {
 		if (mesh_add_ht_cap_ie(sdata, skb) ||
-		    mesh_add_ht_oper_ie(sdata, skb))
+		    mesh_add_ht_oper_ie(sdata, skb) ||
+		    mesh_add_vht_cap_ie(sdata, skb) ||
+		    mesh_add_vht_oper_ie(sdata, skb))
 			goto free;
 	}
 
@@ -386,7 +392,7 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
 	rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
 
 	spin_lock_bh(&sta->mesh->plink_lock);
-	sta->last_rx = jiffies;
+	sta->rx_stats.last_rx = jiffies;
 
 	/* rates and capabilities don't change during peering */
 	if (sta->mesh->plink_state == NL80211_PLINK_ESTAB &&
@@ -402,6 +408,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
 					      elems->ht_cap_elem, sta))
 		changed |= IEEE80211_RC_BW_CHANGED;
 
+	ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
+					    elems->vht_cap_elem, sta);
+
 	if (bw != sta->sta.bandwidth)
 		changed |= IEEE80211_RC_BW_CHANGED;
 
@@ -677,6 +686,9 @@ static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sta, &local->sta_list, list) {
+		if (sdata != sta->sdata)
+			continue;
+
 		if (!memcmp(&sta->mesh->llid, &llid, sizeof(llid))) {
 			in_use = true;
 			break;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cd7e55e08a23..b140cc6651f4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -20,7 +20,6 @@
 #include <linux/etherdevice.h>
 #include <linux/moduleparam.h>
 #include <linux/rtnetlink.h>
-#include <linux/pm_qos.h>
 #include <linux/crc32.h>
 #include <linux/slab.h>
 #include <linux/export.h>
@@ -82,13 +81,6 @@ MODULE_PARM_DESC(probe_wait_ms,
 		 " before disconnecting (reason 4).");
 
 /*
- * Weight given to the latest Beacon frame when calculating average signal
- * strength for Beacon frames received in the current BSS. This must be
- * between 1 and 15.
- */
-#define IEEE80211_SIGNAL_AVE_WEIGHT	3
-
-/*
  * How many Beacon frames need to have been used in average signal strength
  * before starting to indicate signal change events.
  */
@@ -943,7 +935,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
 
 void ieee80211_send_nullfunc(struct ieee80211_local *local,
 			     struct ieee80211_sub_if_data *sdata,
-			     int powersave)
+			     bool powersave)
 {
 	struct sk_buff *skb;
 	struct ieee80211_hdr_3addr *nullfunc;
@@ -1427,7 +1419,7 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
 			  msecs_to_jiffies(conf->dynamic_ps_timeout));
 	} else {
 		if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
-			ieee80211_send_nullfunc(local, sdata, 1);
+			ieee80211_send_nullfunc(local, sdata, true);
 
 		if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
 		    ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
@@ -1483,7 +1475,7 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata)
 }
 
 /* need to hold RTNL or interface lock */
-void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
+void ieee80211_recalc_ps(struct ieee80211_local *local)
 {
 	struct ieee80211_sub_if_data *sdata, *found = NULL;
 	int count = 0;
@@ -1512,48 +1504,23 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 	}
 
 	if (count == 1 && ieee80211_powersave_allowed(found)) {
+		u8 dtimper = found->u.mgd.dtim_period;
 		s32 beaconint_us;
 
-		if (latency < 0)
-			latency = pm_qos_request(PM_QOS_NETWORK_LATENCY);
-
 		beaconint_us = ieee80211_tu_to_usec(
 					found->vif.bss_conf.beacon_int);
 
 		timeout = local->dynamic_ps_forced_timeout;
-		if (timeout < 0) {
-			/*
-			 * Go to full PSM if the user configures a very low
-			 * latency requirement.
-			 * The 2000 second value is there for compatibility
-			 * until the PM_QOS_NETWORK_LATENCY is configured
-			 * with real values.
-			 */
-			if (latency > (1900 * USEC_PER_MSEC) &&
-			    latency != (2000 * USEC_PER_SEC))
-				timeout = 0;
-			else
-				timeout = 100;
-		}
+		if (timeout < 0)
+			timeout = 100;
 		local->hw.conf.dynamic_ps_timeout = timeout;
 
-		if (beaconint_us > latency) {
-			local->ps_sdata = NULL;
-		} else {
-			int maxslp = 1;
-			u8 dtimper = found->u.mgd.dtim_period;
-
-			/* If the TIM IE is invalid, pretend the value is 1 */
-			if (!dtimper)
-				dtimper = 1;
-			else if (dtimper > 1)
-				maxslp = min_t(int, dtimper,
-						    latency / beaconint_us);
-
-			local->hw.conf.max_sleep_period = maxslp;
-			local->hw.conf.ps_dtim_period = dtimper;
-			local->ps_sdata = found;
-		}
+		/* If the TIM IE is invalid, pretend the value is 1 */
+		if (!dtimper)
+			dtimper = 1;
+
+		local->hw.conf.ps_dtim_period = dtimper;
+		local->ps_sdata = found;
 	} else {
 		local->ps_sdata = NULL;
 	}
@@ -1642,7 +1609,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
 				  msecs_to_jiffies(
 				  local->hw.conf.dynamic_ps_timeout));
 		} else {
-			ieee80211_send_nullfunc(local, sdata, 1);
+			ieee80211_send_nullfunc(local, sdata, true);
 			/* Flush to get the tx status of nullfunc frame */
 			ieee80211_flush_queues(local, sdata, false);
 		}
@@ -1777,10 +1744,10 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 				     struct ieee80211_sub_if_data *sdata,
 				     const u8 *wmm_param, size_t wmm_param_len)
 {
-	struct ieee80211_tx_queue_params params;
+	struct ieee80211_tx_queue_params params[IEEE80211_NUM_ACS];
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	size_t left;
-	int count;
+	int count, ac;
 	const u8 *pos;
 	u8 uapsd_queues = 0;
 
@@ -1814,25 +1781,24 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		int aci = (pos[0] >> 5) & 0x03;
 		int acm = (pos[0] >> 4) & 0x01;
 		bool uapsd = false;
-		int queue;
 
 		switch (aci) {
 		case 1: /* AC_BK */
-			queue = 3;
+			ac = IEEE80211_AC_BK;
 			if (acm)
 				sdata->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
 				uapsd = true;
 			break;
 		case 2: /* AC_VI */
-			queue = 1;
+			ac = IEEE80211_AC_VI;
 			if (acm)
 				sdata->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
 				uapsd = true;
 			break;
 		case 3: /* AC_VO */
-			queue = 0;
+			ac = IEEE80211_AC_VO;
 			if (acm)
 				sdata->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
@@ -1840,7 +1806,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 			break;
 		case 0: /* AC_BE */
 		default:
-			queue = 2;
+			ac = IEEE80211_AC_BE;
 			if (acm)
 				sdata->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
 			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
@@ -1848,25 +1814,41 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,
 			break;
 		}
 
-		params.aifs = pos[0] & 0x0f;
-		params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
-		params.cw_min = ecw2cw(pos[1] & 0x0f);
-		params.txop = get_unaligned_le16(pos + 2);
-		params.acm = acm;
-		params.uapsd = uapsd;
+		params[ac].aifs = pos[0] & 0x0f;
+
+		if (params[ac].aifs < 2) {
+			sdata_info(sdata,
+				   "AP has invalid WMM params (AIFSN=%d for ACI %d), will use 2\n",
+				   params[ac].aifs, aci);
+			params[ac].aifs = 2;
+		}
+		params[ac].cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
+		params[ac].cw_min = ecw2cw(pos[1] & 0x0f);
+		params[ac].txop = get_unaligned_le16(pos + 2);
+		params[ac].acm = acm;
+		params[ac].uapsd = uapsd;
 
+		if (params[ac].cw_min > params[ac].cw_max) {
+			sdata_info(sdata,
+				   "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n",
+				   params[ac].cw_min, params[ac].cw_max, aci);
+			return false;
+		}
+	}
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
 		mlme_dbg(sdata,
-			 "WMM queue=%d aci=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n",
-			 queue, aci, acm,
-			 params.aifs, params.cw_min, params.cw_max,
-			 params.txop, params.uapsd,
-			 ifmgd->tx_tspec[queue].downgraded);
-		sdata->tx_conf[queue] = params;
-		if (!ifmgd->tx_tspec[queue].downgraded &&
-		    drv_conf_tx(local, sdata, queue, &params))
+			 "WMM AC=%d acm=%d aifs=%d cWmin=%d cWmax=%d txop=%d uapsd=%d, downgraded=%d\n",
+			 ac, params[ac].acm,
+			 params[ac].aifs, params[ac].cw_min, params[ac].cw_max,
+			 params[ac].txop, params[ac].uapsd,
+			 ifmgd->tx_tspec[ac].downgraded);
+		sdata->tx_conf[ac] = params[ac];
+		if (!ifmgd->tx_tspec[ac].downgraded &&
+		    drv_conf_tx(local, sdata, ac, &params[ac]))
 			sdata_err(sdata,
-				  "failed to set TX queue parameters for queue %d\n",
-				  queue);
+				  "failed to set TX queue parameters for AC %d\n",
+				  ac);
 	}
 
 	/* enable WMM or activate new settings */
@@ -2004,7 +1986,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
 	mutex_lock(&local->iflist_mtx);
-	ieee80211_recalc_ps(local, -1);
+	ieee80211_recalc_ps(local);
 	mutex_unlock(&local->iflist_mtx);
 
 	ieee80211_recalc_smps(sdata);
@@ -2110,7 +2092,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	/* disassociated - set to defaults now */
-	ieee80211_set_wmm_default(sdata, false);
+	ieee80211_set_wmm_default(sdata, false, false);
 
 	del_timer_sync(&sdata->u.mgd.conn_mon_timer);
 	del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
@@ -2172,7 +2154,7 @@ static void ieee80211_reset_ap_probe(struct ieee80211_sub_if_data *sdata)
 	__ieee80211_stop_poll(sdata);
 
 	mutex_lock(&local->iflist_mtx);
-	ieee80211_recalc_ps(local, -1);
+	ieee80211_recalc_ps(local);
 	mutex_unlock(&local->iflist_mtx);
 
 	if (ieee80211_hw_check(&sdata->local->hw, CONNECTION_MONITOR))
@@ -2275,7 +2257,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
 
 	if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
 		ifmgd->nullfunc_failed = false;
-		ieee80211_send_nullfunc(sdata->local, sdata, 0);
+		ieee80211_send_nullfunc(sdata->local, sdata, false);
 	} else {
 		int ssid_len;
 
@@ -2348,7 +2330,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
 		goto out;
 
 	mutex_lock(&sdata->local->iflist_mtx);
-	ieee80211_recalc_ps(sdata->local, -1);
+	ieee80211_recalc_ps(sdata->local);
 	mutex_unlock(&sdata->local->iflist_mtx);
 
 	ifmgd->probe_send_count = 0;
@@ -2453,15 +2435,9 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
 		container_of(work, struct ieee80211_sub_if_data,
 			     u.mgd.beacon_connection_loss_work);
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	struct sta_info *sta;
 
-	if (ifmgd->associated) {
-		rcu_read_lock();
-		sta = sta_info_get(sdata, ifmgd->bssid);
-		if (sta)
-			sta->beacon_loss_count++;
-		rcu_read_unlock();
-	}
+	if (ifmgd->associated)
+		ifmgd->beacon_loss_count++;
 
 	if (ifmgd->connection_loss) {
 		sdata_info(sdata, "Connection to AP %pM lost\n",
@@ -3051,8 +3027,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 
 	rate_control_rate_init(sta);
 
-	if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED)
+	if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) {
 		set_sta_flag(sta, WLAN_STA_MFP);
+		sta->sta.mfp = true;
+	} else {
+		sta->sta.mfp = false;
+	}
 
 	sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS;
 
@@ -3079,11 +3059,21 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	 */
 	ifmgd->wmm_last_param_set = -1;
 
-	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_WMM) && elems.wmm_param)
-		ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
-					 elems.wmm_param_len);
-	else
-		ieee80211_set_wmm_default(sdata, false);
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_WMM) {
+		ieee80211_set_wmm_default(sdata, false, false);
+	} else if (!ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
+					     elems.wmm_param_len)) {
+		/* still enable QoS since we might have HT/VHT */
+		ieee80211_set_wmm_default(sdata, false, true);
+		/* set the disable-WMM flag in this case to disable
+		 * tracking WMM parameter changes in the beacon if
+		 * the parameters weren't actually valid. Doing so
+		 * avoids changing parameters very strangely when
+		 * the AP is going back and forth between valid and
+		 * invalid parameters.
+		 */
+		ifmgd->flags |= IEEE80211_STA_DISABLE_WMM;
+	}
 	changed |= BSS_CHANGED_QOS;
 
 	/* set AID and assoc capability,
@@ -3262,16 +3252,6 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 	if (ifmgd->associated &&
 	    ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
 		ieee80211_reset_ap_probe(sdata);
-
-	if (ifmgd->auth_data && !ifmgd->auth_data->bss->proberesp_ies &&
-	    ether_addr_equal(mgmt->bssid, ifmgd->auth_data->bss->bssid)) {
-		/* got probe response, continue with auth */
-		sdata_info(sdata, "direct probe responded\n");
-		ifmgd->auth_data->tries = 0;
-		ifmgd->auth_data->timeout = jiffies;
-		ifmgd->auth_data->timeout_started = true;
-		run_again(sdata, ifmgd->auth_data->timeout);
-	}
 }
 
 /*
@@ -3374,24 +3354,21 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	bssid = ifmgd->associated->bssid;
 
 	/* Track average RSSI from the Beacon frames of the current AP */
-	ifmgd->last_beacon_signal = rx_status->signal;
 	if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
 		ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
-		ifmgd->ave_beacon_signal = rx_status->signal * 16;
+		ewma_beacon_signal_init(&ifmgd->ave_beacon_signal);
 		ifmgd->last_cqm_event_signal = 0;
 		ifmgd->count_beacon_signal = 1;
 		ifmgd->last_ave_beacon_signal = 0;
 	} else {
-		ifmgd->ave_beacon_signal =
-			(IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
-			 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
-			 ifmgd->ave_beacon_signal) / 16;
 		ifmgd->count_beacon_signal++;
 	}
 
+	ewma_beacon_signal_add(&ifmgd->ave_beacon_signal, -rx_status->signal);
+
 	if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold &&
 	    ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
-		int sig = ifmgd->ave_beacon_signal;
+		int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
 		int last_sig = ifmgd->last_ave_beacon_signal;
 		struct ieee80211_event event = {
 			.type = RSSI_EVENT,
@@ -3418,10 +3395,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	if (bss_conf->cqm_rssi_thold &&
 	    ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
 	    !(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) {
-		int sig = ifmgd->ave_beacon_signal / 16;
+		int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
 		int last_event = ifmgd->last_cqm_event_signal;
 		int thold = bss_conf->cqm_rssi_thold;
 		int hyst = bss_conf->cqm_rssi_hyst;
+
 		if (sig < thold &&
 		    (last_event == 0 || sig < last_event - hyst)) {
 			ifmgd->last_cqm_event_signal = sig;
@@ -3456,31 +3434,27 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 					  len - baselen, false, &elems,
 					  care_about_ies, ncrc);
 
-	if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) {
-		bool directed_tim = ieee80211_check_tim(elems.tim,
-							elems.tim_len,
-							ifmgd->aid);
-		if (directed_tim) {
-			if (local->hw.conf.dynamic_ps_timeout > 0) {
-				if (local->hw.conf.flags & IEEE80211_CONF_PS) {
-					local->hw.conf.flags &= ~IEEE80211_CONF_PS;
-					ieee80211_hw_config(local,
-							    IEEE80211_CONF_CHANGE_PS);
-				}
-				ieee80211_send_nullfunc(local, sdata, 0);
-			} else if (!local->pspolling && sdata->u.mgd.powersave) {
-				local->pspolling = true;
-
-				/*
-				 * Here is assumed that the driver will be
-				 * able to send ps-poll frame and receive a
-				 * response even though power save mode is
-				 * enabled, but some drivers might require
-				 * to disable power save here. This needs
-				 * to be investigated.
-				 */
-				ieee80211_send_pspoll(local, sdata);
+	if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
+	    ieee80211_check_tim(elems.tim, elems.tim_len, ifmgd->aid)) {
+		if (local->hw.conf.dynamic_ps_timeout > 0) {
+			if (local->hw.conf.flags & IEEE80211_CONF_PS) {
+				local->hw.conf.flags &= ~IEEE80211_CONF_PS;
+				ieee80211_hw_config(local,
+						    IEEE80211_CONF_CHANGE_PS);
 			}
+			ieee80211_send_nullfunc(local, sdata, false);
+		} else if (!local->pspolling && sdata->u.mgd.powersave) {
+			local->pspolling = true;
+
+			/*
+			 * Here is assumed that the driver will be
+			 * able to send ps-poll frame and receive a
+			 * response even though power save mode is
+			 * enabled, but some drivers might require
+			 * to disable power save here. This needs
+			 * to be investigated.
+			 */
+			ieee80211_send_pspoll(local, sdata);
 		}
 	}
 
@@ -3567,7 +3541,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		ifmgd->have_beacon = true;
 
 		mutex_lock(&local->iflist_mtx);
-		ieee80211_recalc_ps(local, -1);
+		ieee80211_recalc_ps(local);
 		mutex_unlock(&local->iflist_mtx);
 
 		ieee80211_recalc_ps_vif(sdata);
@@ -3717,12 +3691,14 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
 				    reason);
 }
 
-static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data;
 	u32 tx_flags = 0;
+	u16 trans = 1;
+	u16 status = 0;
 
 	sdata_assert_lock(sdata);
 
@@ -3746,54 +3722,27 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
 
 	drv_mgd_prepare_tx(local, sdata);
 
-	if (auth_data->bss->proberesp_ies) {
-		u16 trans = 1;
-		u16 status = 0;
-
-		sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
-			   auth_data->bss->bssid, auth_data->tries,
-			   IEEE80211_AUTH_MAX_TRIES);
-
-		auth_data->expected_transaction = 2;
+	sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
+		   auth_data->bss->bssid, auth_data->tries,
+		   IEEE80211_AUTH_MAX_TRIES);
 
-		if (auth_data->algorithm == WLAN_AUTH_SAE) {
-			trans = auth_data->sae_trans;
-			status = auth_data->sae_status;
-			auth_data->expected_transaction = trans;
-		}
+	auth_data->expected_transaction = 2;
 
-		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
-			tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
-				   IEEE80211_TX_INTFL_MLME_CONN_TX;
-
-		ieee80211_send_auth(sdata, trans, auth_data->algorithm, status,
-				    auth_data->data, auth_data->data_len,
-				    auth_data->bss->bssid,
-				    auth_data->bss->bssid, NULL, 0, 0,
-				    tx_flags);
-	} else {
-		const u8 *ssidie;
+	if (auth_data->algorithm == WLAN_AUTH_SAE) {
+		trans = auth_data->sae_trans;
+		status = auth_data->sae_status;
+		auth_data->expected_transaction = trans;
+	}
 
-		sdata_info(sdata, "direct probe to %pM (try %d/%i)\n",
-			   auth_data->bss->bssid, auth_data->tries,
-			   IEEE80211_AUTH_MAX_TRIES);
+	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
+		tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
+			   IEEE80211_TX_INTFL_MLME_CONN_TX;
 
-		rcu_read_lock();
-		ssidie = ieee80211_bss_get_ie(auth_data->bss, WLAN_EID_SSID);
-		if (!ssidie) {
-			rcu_read_unlock();
-			return -EINVAL;
-		}
-		/*
-		 * Direct probe is sent to broadcast address as some APs
-		 * will not answer to direct packet in unassociated state.
-		 */
-		ieee80211_send_probe_req(sdata, sdata->vif.addr, NULL,
-					 ssidie + 2, ssidie[1],
-					 NULL, 0, (u32) -1, true, 0,
-					 auth_data->bss->channel, false);
-		rcu_read_unlock();
-	}
+	ieee80211_send_auth(sdata, trans, auth_data->algorithm, status,
+			    auth_data->data, auth_data->data_len,
+			    auth_data->bss->bssid,
+			    auth_data->bss->bssid, NULL, 0, 0,
+			    tx_flags);
 
 	if (tx_flags == 0) {
 		auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
@@ -3874,8 +3823,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 		bool status_acked = ifmgd->status_acked;
 
 		ifmgd->status_received = false;
-		if (ifmgd->auth_data &&
-		    (ieee80211_is_probe_req(fc) || ieee80211_is_auth(fc))) {
+		if (ifmgd->auth_data && ieee80211_is_auth(fc)) {
 			if (status_acked) {
 				ifmgd->auth_data->timeout =
 					jiffies + IEEE80211_AUTH_TIMEOUT_SHORT;
@@ -3906,7 +3854,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
 			 * so let's just kill the auth data
 			 */
 			ieee80211_destroy_auth_data(sdata, false);
-		} else if (ieee80211_probe_auth(sdata)) {
+		} else if (ieee80211_auth(sdata)) {
 			u8 bssid[ETH_ALEN];
 			struct ieee80211_event event = {
 				.type = MLME_EVENT,
@@ -4197,21 +4145,6 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
 	rcu_read_unlock();
 }
 
-int ieee80211_max_network_latency(struct notifier_block *nb,
-				  unsigned long data, void *dummy)
-{
-	s32 latency_usec = (s32) data;
-	struct ieee80211_local *local =
-		container_of(nb, struct ieee80211_local,
-			     network_latency_notifier);
-
-	mutex_lock(&local->iflist_mtx);
-	ieee80211_recalc_ps(local, latency_usec);
-	mutex_unlock(&local->iflist_mtx);
-
-	return NOTIFY_OK;
-}
-
 static u8 ieee80211_ht_vht_rx_chains(struct ieee80211_sub_if_data *sdata,
 				     struct cfg80211_bss *cbss)
 {
@@ -4613,7 +4546,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	if (err)
 		goto err_clear;
 
-	err = ieee80211_probe_auth(sdata);
+	err = ieee80211_auth(sdata);
 	if (err) {
 		sta_info_destroy_addr(sdata, req->bss->bssid);
 		goto err_clear;
@@ -4635,44 +4568,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	return err;
 }
 
-static bool ieee80211_usable_wmm_params(struct ieee80211_sub_if_data *sdata,
-					const u8 *wmm_param, int len)
-{
-	const u8 *pos;
-	size_t left;
-
-	if (len < 8)
-		return false;
-
-	if (wmm_param[5] != 1 /* version */)
-		return false;
-
-	pos = wmm_param + 8;
-	left = len - 8;
-
-	for (; left >= 4; left -= 4, pos += 4) {
-		u8 aifsn = pos[0] & 0x0f;
-		u8 ecwmin = pos[1] & 0x0f;
-		u8 ecwmax = (pos[1] & 0xf0) >> 4;
-		int aci = (pos[0] >> 5) & 0x03;
-
-		if (aifsn < 2) {
-			sdata_info(sdata,
-				   "AP has invalid WMM params (AIFSN=%d for ACI %d), disabling WMM\n",
-				   aifsn, aci);
-			return false;
-		}
-		if (ecwmin > ecwmax) {
-			sdata_info(sdata,
-				   "AP has invalid WMM params (ECWmin/max=%d/%d for ACI %d), disabling WMM\n",
-				   ecwmin, ecwmax, aci);
-			return false;
-		}
-	}
-
-	return true;
-}
-
 int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_assoc_request *req)
 {
@@ -4737,39 +4632,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 
 	assoc_data->wmm = bss->wmm_used &&
 			  (local->hw.queues >= IEEE80211_NUM_ACS);
-	if (assoc_data->wmm) {
-		/* try to check validity of WMM params IE */
-		const struct cfg80211_bss_ies *ies;
-		const u8 *wp, *start, *end;
-
-		rcu_read_lock();
-		ies = rcu_dereference(req->bss->ies);
-		start = ies->data;
-		end = start + ies->len;
-
-		while (true) {
-			wp = cfg80211_find_vendor_ie(
-				WLAN_OUI_MICROSOFT,
-				WLAN_OUI_TYPE_MICROSOFT_WMM,
-				start, end - start);
-			if (!wp)
-				break;
-			start = wp + wp[1] + 2;
-			/* if this IE is too short, try the next */
-			if (wp[1] <= 4)
-				continue;
-			/* if this IE is WMM params, we found what we wanted */
-			if (wp[6] == 1)
-				break;
-		}
-
-		if (!wp || !ieee80211_usable_wmm_params(sdata, wp + 2,
-							wp[1] - 2)) {
-			assoc_data->wmm = false;
-			ifmgd->flags |= IEEE80211_STA_DISABLE_WMM;
-		}
-		rcu_read_unlock();
-	}
 
 	/*
 	 * IEEE802.11n does not allow TKIP/WEP as pairwise ciphers in HT mode.
@@ -5028,6 +4890,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 		return 0;
 	}
 
+	if (ifmgd->assoc_data &&
+	    ether_addr_equal(ifmgd->assoc_data->bss->bssid, req->bssid)) {
+		sdata_info(sdata,
+			   "aborting association with %pM by local choice (Reason: %u=%s)\n",
+			   req->bssid, req->reason_code,
+			   ieee80211_get_reason_code_string(req->reason_code));
+
+		drv_mgd_prepare_tx(sdata->local, sdata);
+		ieee80211_send_deauth_disassoc(sdata, req->bssid,
+					       IEEE80211_STYPE_DEAUTH,
+					       req->reason_code, tx,
+					       frame_buf);
+		ieee80211_destroy_assoc_data(sdata, false);
+		ieee80211_report_disconnect(sdata, frame_buf,
+					    sizeof(frame_buf), true,
+					    req->reason_code);
+		return 0;
+	}
+
 	if (ifmgd->associated &&
 	    ether_addr_equal(ifmgd->associated->bssid, req->bssid)) {
 		sdata_info(sdata,
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 573b81a1fb2d..0be0aadfc559 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -75,7 +75,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
 	if (!sta)
 		return;
 
-	sta->last_rx = jiffies;
+	sta->rx_stats.last_rx = jiffies;
 
 	/* Add only mandatory rates for now */
 	sband = local->hw.wiphy->bands[band];
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f2c75cf491fc..04401037140e 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -57,7 +57,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
 		 * to send a new nullfunc frame to inform the AP that we
 		 * are again sleeping.
 		 */
-		ieee80211_send_nullfunc(local, sdata, 1);
+		ieee80211_send_nullfunc(local, sdata, true);
 }
 
 /* inform AP that we are awake again, unless power save is enabled */
@@ -66,7 +66,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 
 	if (!local->ps_sdata)
-		ieee80211_send_nullfunc(local, sdata, 0);
+		ieee80211_send_nullfunc(local, sdata, false);
 	else if (local->offchannel_ps_enabled) {
 		/*
 		 * In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware
@@ -93,7 +93,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
 		 * restart the timer now and send a nullfunc frame to inform
 		 * the AP that we are awake.
 		 */
-		ieee80211_send_nullfunc(local, sdata, 0);
+		ieee80211_send_nullfunc(local, sdata, false);
 		mod_timer(&local->dynamic_ps_timer, jiffies +
 			  msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
 	}
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index b676b9fa707b..00a43a70e1fc 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -6,6 +6,13 @@
 #include "driver-ops.h"
 #include "led.h"
 
+static void ieee80211_sched_scan_cancel(struct ieee80211_local *local)
+{
+	if (ieee80211_request_sched_scan_stop(local))
+		return;
+	cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
+}
+
 int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
@@ -23,7 +30,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 
 	ieee80211_del_virtual_monitor(local);
 
-	if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
+	if (ieee80211_hw_check(hw, AMPDU_AGGREGATION) &&
+	    !(wowlan && wowlan->any)) {
 		mutex_lock(&local->sta_mtx);
 		list_for_each_entry(sta, &local->sta_list, list) {
 			set_sta_flag(sta, WLAN_STA_BLOCK_BA);
@@ -33,6 +41,10 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
 		mutex_unlock(&local->sta_mtx);
 	}
 
+	/* keep sched_scan only in case of 'any' trigger */
+	if (!(wowlan && wowlan->any))
+		ieee80211_sched_scan_cancel(local);
+
 	ieee80211_stop_queues_by_reason(hw,
 					IEEE80211_MAX_QUEUE_MAP,
 					IEEE80211_QUEUE_STOP_REASON_SUSPEND,
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 9ce8883d5f44..a4e2f4e67f94 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -305,7 +305,10 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
 		info->control.rates[0].idx = i;
 		break;
 	}
-	WARN_ON_ONCE(i == sband->n_bitrates);
+	WARN_ONCE(i == sband->n_bitrates,
+		  "no supported rates (0x%x) in rate_mask 0x%x with flags 0x%x\n",
+		  sta ? sta->supp_rates[sband->band] : -1,
+		  rate_mask, rate_flags);
 
 	info->control.rates[0].count =
 		(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 1db5f7c3318a..820b0abc9c0d 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -85,12 +85,10 @@ minstrel_stats_open(struct inode *inode, struct file *file)
 	file->private_data = ms;
 	p = ms->buf;
 	p += sprintf(p, "\n");
-	p += sprintf(p, "best   __________rate_________    ______"
-			"statistics______    ________last_______    "
-			"______sum-of________\n");
-	p += sprintf(p, "rate  [name idx airtime max_tp]  [ ø(tp) ø(prob) "
-			"sd(prob)]  [prob.|retry|suc|att]  "
-			"[#success | #attempts]\n");
+	p += sprintf(p,
+		     "best   __________rate_________    ________statistics________    ________last_______    ______sum-of________\n");
+	p += sprintf(p,
+		     "rate  [name idx airtime max_tp]  [avg(tp) avg(prob) sd(prob)]  [prob.|retry|suc|att]  [#success | #attempts]\n");
 
 	for (i = 0; i < mi->n_rates; i++) {
 		struct minstrel_rate *mr = &mi->r[i];
@@ -112,7 +110,7 @@ minstrel_stats_open(struct inode *inode, struct file *file)
 		prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
 		eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
 
-		p += sprintf(p, "%4u.%1u   %4u.%1u   %3u.%1u    %3u.%1u"
+		p += sprintf(p, "%4u.%1u    %4u.%1u     %3u.%1u    %3u.%1u"
 				"     %3u.%1u %3u   %3u %-3u   "
 				"%9llu   %-9llu\n",
 				tp_max / 10, tp_max % 10,
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 6822ce0f95e5..5320e35ed3d0 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -86,7 +86,7 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
 		prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
 		eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
 
-		p += sprintf(p, "%4u.%1u   %4u.%1u   %3u.%1u    %3u.%1u"
+		p += sprintf(p, "%4u.%1u    %4u.%1u     %3u.%1u    %3u.%1u"
 				"     %3u.%1u %3u   %3u %-3u   "
 				"%9llu   %-9llu\n",
 				tp_max / 10, tp_max % 10,
@@ -129,12 +129,10 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
 	p = ms->buf;
 
 	p += sprintf(p, "\n");
-	p += sprintf(p, "              best   ____________rate__________    "
-			"______statistics______    ________last_______    "
-			"______sum-of________\n");
-	p += sprintf(p, "mode guard #  rate  [name   idx airtime  max_tp]  "
-			"[ ø(tp) ø(prob) sd(prob)]  [prob.|retry|suc|att]  [#success | "
-			"#attempts]\n");
+	p += sprintf(p,
+		     "              best   ____________rate__________    ________statistics________    ________last_______    ______sum-of________\n");
+	p += sprintf(p,
+		     "mode guard #  rate  [name   idx airtime  max_tp]  [avg(tp) avg(prob) sd(prob)]  [prob.|retry|suc|att]  [#success | #attempts]\n");
 
 	p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p);
 	for (i = 0; i < MINSTREL_CCK_GROUP; i++)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5bc0b88d9eb1..8bae5de0dc44 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1113,16 +1113,16 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx)
 	    is_multicast_ether_addr(hdr->addr1))
 		return RX_CONTINUE;
 
-	if (rx->sta) {
-		if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
-			     rx->sta->last_seq_ctrl[rx->seqno_idx] ==
-			     hdr->seq_ctrl)) {
-			I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
-			rx->sta->num_duplicates++;
-			return RX_DROP_UNUSABLE;
-		} else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
-			rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
-		}
+	if (!rx->sta)
+		return RX_CONTINUE;
+
+	if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
+		     rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) {
+		I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount);
+		rx->sta->rx_stats.num_duplicates++;
+		return RX_DROP_UNUSABLE;
+	} else if (!(status->flag & RX_FLAG_AMSDU_MORE)) {
+		rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl;
 	}
 
 	return RX_CONTINUE;
@@ -1396,51 +1396,56 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 						NL80211_IFTYPE_ADHOC);
 		if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) &&
 		    test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
-			sta->last_rx = jiffies;
+			sta->rx_stats.last_rx = jiffies;
 			if (ieee80211_is_data(hdr->frame_control) &&
 			    !is_multicast_ether_addr(hdr->addr1)) {
-				sta->last_rx_rate_idx = status->rate_idx;
-				sta->last_rx_rate_flag = status->flag;
-				sta->last_rx_rate_vht_flag = status->vht_flag;
-				sta->last_rx_rate_vht_nss = status->vht_nss;
+				sta->rx_stats.last_rate_idx =
+					status->rate_idx;
+				sta->rx_stats.last_rate_flag =
+					status->flag;
+				sta->rx_stats.last_rate_vht_flag =
+					status->vht_flag;
+				sta->rx_stats.last_rate_vht_nss =
+					status->vht_nss;
 			}
 		}
 	} else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) {
-		sta->last_rx = jiffies;
+		sta->rx_stats.last_rx = jiffies;
 	} else if (!is_multicast_ether_addr(hdr->addr1)) {
 		/*
 		 * Mesh beacons will update last_rx when if they are found to
 		 * match the current local configuration when processed.
 		 */
-		sta->last_rx = jiffies;
+		sta->rx_stats.last_rx = jiffies;
 		if (ieee80211_is_data(hdr->frame_control)) {
-			sta->last_rx_rate_idx = status->rate_idx;
-			sta->last_rx_rate_flag = status->flag;
-			sta->last_rx_rate_vht_flag = status->vht_flag;
-			sta->last_rx_rate_vht_nss = status->vht_nss;
+			sta->rx_stats.last_rate_idx = status->rate_idx;
+			sta->rx_stats.last_rate_flag = status->flag;
+			sta->rx_stats.last_rate_vht_flag = status->vht_flag;
+			sta->rx_stats.last_rate_vht_nss = status->vht_nss;
 		}
 	}
 
 	if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
 		ieee80211_sta_rx_notify(rx->sdata, hdr);
 
-	sta->rx_fragments++;
-	sta->rx_bytes += rx->skb->len;
+	sta->rx_stats.fragments++;
+	sta->rx_stats.bytes += rx->skb->len;
 	if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
-		sta->last_signal = status->signal;
-		ewma_signal_add(&sta->avg_signal, -status->signal);
+		sta->rx_stats.last_signal = status->signal;
+		ewma_signal_add(&sta->rx_stats.avg_signal, -status->signal);
 	}
 
 	if (status->chains) {
-		sta->chains = status->chains;
+		sta->rx_stats.chains = status->chains;
 		for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) {
 			int signal = status->chain_signal[i];
 
 			if (!(status->chains & BIT(i)))
 				continue;
 
-			sta->chain_signal_last[i] = signal;
-			ewma_signal_add(&sta->chain_signal_avg[i], -signal);
+			sta->rx_stats.chain_signal_last[i] = signal;
+			ewma_signal_add(&sta->rx_stats.chain_signal_avg[i],
+					-signal);
 		}
 	}
 
@@ -1500,7 +1505,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 		 * Update counter and free packet here to avoid
 		 * counting this as a dropped packed.
 		 */
-		sta->rx_packets++;
+		sta->rx_stats.packets++;
 		dev_kfree_skb(rx->skb);
 		return RX_QUEUED;
 	}
@@ -1922,7 +1927,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	ieee80211_led_rx(rx->local);
  out_no_led:
 	if (rx->sta)
-		rx->sta->rx_packets++;
+		rx->sta->rx_stats.packets++;
 	return RX_CONTINUE;
 }
 
@@ -2376,7 +2381,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 		 * for non-QoS-data frames. Here we know it's a data
 		 * frame, so count MSDUs.
 		 */
-		rx->sta->rx_msdu[rx->seqno_idx]++;
+		rx->sta->rx_stats.msdu[rx->seqno_idx]++;
 	}
 
 	/*
@@ -2413,7 +2418,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
 			skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb);
 			schedule_work(&local->tdls_chsw_work);
 			if (rx->sta)
-				rx->sta->rx_packets++;
+				rx->sta->rx_stats.packets++;
 
 			return RX_QUEUED;
 		}
@@ -2875,7 +2880,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 
  handled:
 	if (rx->sta)
-		rx->sta->rx_packets++;
+		rx->sta->rx_stats.packets++;
 	dev_kfree_skb(rx->skb);
 	return RX_QUEUED;
 
@@ -2884,7 +2889,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 	skb_queue_tail(&sdata->skb_queue, rx->skb);
 	ieee80211_queue_work(&local->hw, &sdata->work);
 	if (rx->sta)
-		rx->sta->rx_packets++;
+		rx->sta->rx_stats.packets++;
 	return RX_QUEUED;
 }
 
@@ -2911,7 +2916,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
 	if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
 			     rx->skb->data, rx->skb->len, 0)) {
 		if (rx->sta)
-			rx->sta->rx_packets++;
+			rx->sta->rx_stats.packets++;
 		dev_kfree_skb(rx->skb);
 		return RX_QUEUED;
 	}
@@ -3030,7 +3035,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	skb_queue_tail(&sdata->skb_queue, rx->skb);
 	ieee80211_queue_work(&rx->local->hw, &sdata->work);
 	if (rx->sta)
-		rx->sta->rx_packets++;
+		rx->sta->rx_stats.packets++;
 
 	return RX_QUEUED;
 }
@@ -3112,7 +3117,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
 	case RX_DROP_MONITOR:
 		I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
 		if (rx->sta)
-			rx->sta->rx_dropped++;
+			rx->sta->rx_stats.dropped++;
 		/* fall through */
 	case RX_CONTINUE: {
 		struct ieee80211_rate *rate = NULL;
@@ -3132,7 +3137,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
 	case RX_DROP_UNUSABLE:
 		I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
 		if (rx->sta)
-			rx->sta->rx_dropped++;
+			rx->sta->rx_stats.dropped++;
 		dev_kfree_skb(rx->skb);
 		break;
 	case RX_QUEUED:
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 11d0901ebb7b..4aeca4b0c3cb 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -16,7 +16,6 @@
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
 #include <linux/rtnetlink.h>
-#include <linux/pm_qos.h>
 #include <net/sch_generic.h>
 #include <linux/slab.h>
 #include <linux/export.h>
@@ -67,24 +66,23 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	struct cfg80211_bss *cbss;
 	struct ieee80211_bss *bss;
 	int clen, srlen;
-	enum nl80211_bss_scan_width scan_width;
-	s32 signal = 0;
+	struct cfg80211_inform_bss bss_meta = {};
 	bool signal_valid;
 
 	if (ieee80211_hw_check(&local->hw, SIGNAL_DBM))
-		signal = rx_status->signal * 100;
+		bss_meta.signal = rx_status->signal * 100;
 	else if (ieee80211_hw_check(&local->hw, SIGNAL_UNSPEC))
-		signal = (rx_status->signal * 100) / local->hw.max_signal;
+		bss_meta.signal = (rx_status->signal * 100) / local->hw.max_signal;
 
-	scan_width = NL80211_BSS_CHAN_WIDTH_20;
+	bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_20;
 	if (rx_status->flag & RX_FLAG_5MHZ)
-		scan_width = NL80211_BSS_CHAN_WIDTH_5;
+		bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_5;
 	if (rx_status->flag & RX_FLAG_10MHZ)
-		scan_width = NL80211_BSS_CHAN_WIDTH_10;
+		bss_meta.scan_width = NL80211_BSS_CHAN_WIDTH_10;
 
-	cbss = cfg80211_inform_bss_width_frame(local->hw.wiphy, channel,
-					       scan_width, mgmt, len, signal,
-					       GFP_ATOMIC);
+	bss_meta.chan = channel;
+	cbss = cfg80211_inform_bss_frame_data(local->hw.wiphy, &bss_meta,
+					      mgmt, len, GFP_ATOMIC);
 	if (!cbss)
 		return NULL;
 	/* In case the signal is invalid update the status */
@@ -1142,10 +1140,10 @@ int ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 	return ret;
 }
 
-int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
+int ieee80211_request_sched_scan_stop(struct ieee80211_local *local)
 {
-	struct ieee80211_local *local = sdata->local;
-	int ret = 0;
+	struct ieee80211_sub_if_data *sched_scan_sdata;
+	int ret = -ENOENT;
 
 	mutex_lock(&local->mtx);
 
@@ -1157,8 +1155,10 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_sub_if_data *sdata)
 	/* We don't want to restart sched scan anymore. */
 	RCU_INIT_POINTER(local->sched_scan_req, NULL);
 
-	if (rcu_access_pointer(local->sched_scan_sdata)) {
-		ret = drv_sched_scan_stop(local, sdata);
+	sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
+						lockdep_is_held(&local->mtx));
+	if (sched_scan_sdata) {
+		ret = drv_sched_scan_stop(local, sched_scan_sdata);
 		if (!ret)
 			RCU_INIT_POINTER(local->sched_scan_sdata, NULL);
 	}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 64f1936350c6..f91d1873218c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -303,7 +303,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_hw *hw = &local->hw;
 	struct sta_info *sta;
-	struct timespec uptime;
 	int i;
 
 	sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp);
@@ -332,18 +331,17 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	memcpy(sta->sta.addr, addr, ETH_ALEN);
 	sta->local = local;
 	sta->sdata = sdata;
-	sta->last_rx = jiffies;
+	sta->rx_stats.last_rx = jiffies;
 
 	sta->sta_state = IEEE80211_STA_NONE;
 
 	/* Mark TID as unreserved */
 	sta->reserved_tid = IEEE80211_TID_UNRESERVED;
 
-	ktime_get_ts(&uptime);
-	sta->last_connected = uptime.tv_sec;
-	ewma_signal_init(&sta->avg_signal);
-	for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++)
-		ewma_signal_init(&sta->chain_signal_avg[i]);
+	sta->last_connected = ktime_get_seconds();
+	ewma_signal_init(&sta->rx_stats.avg_signal);
+	for (i = 0; i < ARRAY_SIZE(sta->rx_stats.chain_signal_avg); i++)
+		ewma_signal_init(&sta->rx_stats.chain_signal_avg[i]);
 
 	if (local->ops->wake_tx_queue) {
 		void *txq_data;
@@ -1068,7 +1066,7 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 		if (sdata != sta->sdata)
 			continue;
 
-		if (time_after(jiffies, sta->last_rx + exp_time)) {
+		if (time_after(jiffies, sta->rx_stats.last_rx + exp_time)) {
 			sta_dbg(sta->sdata, "expiring inactive STA %pM\n",
 				sta->sta.addr);
 
@@ -1808,12 +1806,50 @@ u8 sta_info_tx_streams(struct sta_info *sta)
 			>> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1;
 }
 
+static void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
+{
+	rinfo->flags = 0;
+
+	if (sta->rx_stats.last_rate_flag & RX_FLAG_HT) {
+		rinfo->flags |= RATE_INFO_FLAGS_MCS;
+		rinfo->mcs = sta->rx_stats.last_rate_idx;
+	} else if (sta->rx_stats.last_rate_flag & RX_FLAG_VHT) {
+		rinfo->flags |= RATE_INFO_FLAGS_VHT_MCS;
+		rinfo->nss = sta->rx_stats.last_rate_vht_nss;
+		rinfo->mcs = sta->rx_stats.last_rate_idx;
+	} else {
+		struct ieee80211_supported_band *sband;
+		int shift = ieee80211_vif_get_shift(&sta->sdata->vif);
+		u16 brate;
+
+		sband = sta->local->hw.wiphy->bands[
+				ieee80211_get_sdata_band(sta->sdata)];
+		brate = sband->bitrates[sta->rx_stats.last_rate_idx].bitrate;
+		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
+	}
+
+	if (sta->rx_stats.last_rate_flag & RX_FLAG_SHORT_GI)
+		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
+
+	if (sta->rx_stats.last_rate_flag & RX_FLAG_5MHZ)
+		rinfo->bw = RATE_INFO_BW_5;
+	else if (sta->rx_stats.last_rate_flag & RX_FLAG_10MHZ)
+		rinfo->bw = RATE_INFO_BW_10;
+	else if (sta->rx_stats.last_rate_flag & RX_FLAG_40MHZ)
+		rinfo->bw = RATE_INFO_BW_40;
+	else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_80MHZ)
+		rinfo->bw = RATE_INFO_BW_80;
+	else if (sta->rx_stats.last_rate_vht_flag & RX_VHT_FLAG_160MHZ)
+		rinfo->bw = RATE_INFO_BW_160;
+	else
+		rinfo->bw = RATE_INFO_BW_20;
+}
+
 void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
 	struct rate_control_ref *ref = NULL;
-	struct timespec uptime;
 	u32 thr = 0;
 	int i, ac;
 
@@ -1835,51 +1871,54 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			 BIT(NL80211_STA_INFO_STA_FLAGS) |
 			 BIT(NL80211_STA_INFO_BSS_PARAM) |
 			 BIT(NL80211_STA_INFO_CONNECTED_TIME) |
-			 BIT(NL80211_STA_INFO_RX_DROP_MISC) |
-			 BIT(NL80211_STA_INFO_BEACON_LOSS);
+			 BIT(NL80211_STA_INFO_RX_DROP_MISC);
+
+	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+		sinfo->beacon_loss_count = sdata->u.mgd.beacon_loss_count;
+		sinfo->filled |= BIT(NL80211_STA_INFO_BEACON_LOSS);
+	}
 
-	ktime_get_ts(&uptime);
-	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
-	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
+	sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
+	sinfo->inactive_time =
+		jiffies_to_msecs(jiffies - sta->rx_stats.last_rx);
 
 	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) |
 			       BIT(NL80211_STA_INFO_TX_BYTES)))) {
 		sinfo->tx_bytes = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
-			sinfo->tx_bytes += sta->tx_bytes[ac];
+			sinfo->tx_bytes += sta->tx_stats.bytes[ac];
 		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
 	}
 
 	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) {
 		sinfo->tx_packets = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
-			sinfo->tx_packets += sta->tx_packets[ac];
+			sinfo->tx_packets += sta->tx_stats.packets[ac];
 		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 	}
 
 	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
 			       BIT(NL80211_STA_INFO_RX_BYTES)))) {
-		sinfo->rx_bytes = sta->rx_bytes;
+		sinfo->rx_bytes = sta->rx_stats.bytes;
 		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
 	}
 
 	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
-		sinfo->rx_packets = sta->rx_packets;
+		sinfo->rx_packets = sta->rx_stats.packets;
 		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 	}
 
 	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) {
-		sinfo->tx_retries = sta->tx_retry_count;
+		sinfo->tx_retries = sta->status_stats.retry_count;
 		sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES);
 	}
 
 	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) {
-		sinfo->tx_failed = sta->tx_retry_failed;
+		sinfo->tx_failed = sta->status_stats.retry_failed;
 		sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
 	}
 
-	sinfo->rx_dropped_misc = sta->rx_dropped;
-	sinfo->beacon_loss_count = sta->beacon_loss_count;
+	sinfo->rx_dropped_misc = sta->rx_stats.dropped;
 
 	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
 	    !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) {
@@ -1891,33 +1930,35 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) ||
 	    ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) {
 		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
-			sinfo->signal = (s8)sta->last_signal;
+			sinfo->signal = (s8)sta->rx_stats.last_signal;
 			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		}
 
 		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
 			sinfo->signal_avg =
-				(s8) -ewma_signal_read(&sta->avg_signal);
+				-ewma_signal_read(&sta->rx_stats.avg_signal);
 			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
 		}
 	}
 
-	if (sta->chains &&
+	if (sta->rx_stats.chains &&
 	    !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
 			       BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
 		sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
 				 BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
 
-		sinfo->chains = sta->chains;
+		sinfo->chains = sta->rx_stats.chains;
 		for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
-			sinfo->chain_signal[i] = sta->chain_signal_last[i];
+			sinfo->chain_signal[i] =
+				sta->rx_stats.chain_signal_last[i];
 			sinfo->chain_signal_avg[i] =
-				(s8) -ewma_signal_read(&sta->chain_signal_avg[i]);
+				-ewma_signal_read(&sta->rx_stats.chain_signal_avg[i]);
 		}
 	}
 
 	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) {
-		sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
+		sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate,
+				     &sinfo->txrate);
 		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 	}
 
@@ -1932,12 +1973,12 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 		if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) {
 			tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU);
-			tidstats->rx_msdu = sta->rx_msdu[i];
+			tidstats->rx_msdu = sta->rx_stats.msdu[i];
 		}
 
 		if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) {
 			tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU);
-			tidstats->tx_msdu = sta->tx_msdu[i];
+			tidstats->tx_msdu = sta->tx_stats.msdu[i];
 		}
 
 		if (!(tidstats->filled &
@@ -1945,7 +1986,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		    ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 			tidstats->filled |=
 				BIT(NL80211_TID_STATS_TX_MSDU_RETRIES);
-			tidstats->tx_msdu_retries = sta->tx_msdu_retries[i];
+			tidstats->tx_msdu_retries =
+				sta->status_stats.msdu_retries[i];
 		}
 
 		if (!(tidstats->filled &
@@ -1953,7 +1995,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		    ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 			tidstats->filled |=
 				BIT(NL80211_TID_STATS_TX_MSDU_FAILED);
-			tidstats->tx_msdu_failed = sta->tx_msdu_failed[i];
+			tidstats->tx_msdu_failed =
+				sta->status_stats.msdu_failed[i];
 		}
 	}
 
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index b087c71ff7fe..2cafb21b422f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -133,6 +133,7 @@ enum ieee80211_agg_stop_reason {
  * @buf_size: reorder buffer size at receiver
  * @failed_bar_ssn: ssn of the last failed BAR tx attempt
  * @bar_pending: BAR needs to be re-sent
+ * @amsdu: support A-MSDU withing A-MDPU
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -158,6 +159,7 @@ struct tid_ampdu_tx {
 
 	u16 failed_bar_ssn;
 	bool bar_pending;
+	bool amsdu;
 };
 
 /**
@@ -342,12 +344,6 @@ DECLARE_EWMA(signal, 1024, 8)
  * @rate_ctrl_lock: spinlock used to protect rate control data
  *	(data inside the algorithm, so serializes calls there)
  * @rate_ctrl_priv: rate control private per-STA pointer
- * @last_tx_rate: rate used for last transmit, to report to userspace as
- *	"the" transmit rate
- * @last_rx_rate_idx: rx status rate index of the last data packet
- * @last_rx_rate_flag: rx status flag of the last data packet
- * @last_rx_rate_vht_flag: rx status vht flag of the last data packet
- * @last_rx_rate_vht_nss: rx status nss of last data packet
  * @lock: used for locking all fields that require locking, see comments
  *	in the header file.
  * @drv_deliver_wk: used for delivering frames after driver PS unblocking
@@ -362,23 +358,9 @@ DECLARE_EWMA(signal, 1024, 8)
  *	the station when it leaves powersave or polls for frames
  * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on
  * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on
- * @rx_packets: Number of MSDUs received from this STA
- * @rx_bytes: Number of bytes received from this STA
- * @last_rx: time (in jiffies) when last frame was received from this STA
  * @last_connected: time (in seconds) when a station got connected
- * @num_duplicates: number of duplicate frames received from this STA
- * @rx_fragments: number of received MPDUs
- * @rx_dropped: number of dropped MPDUs from this STA
- * @last_signal: signal of last received frame from this STA
- * @avg_signal: moving average of signal of received frames from this STA
- * @last_ack_signal: signal of last received Ack frame from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per TID
  *	plus one for non-QoS frames)
- * @tx_filtered_count: number of frames the hardware filtered for this STA
- * @tx_retry_failed: number of frames that failed retry
- * @tx_retry_count: total number of retries for frames to this STA
- * @tx_packets: number of RX/TX MSDUs
- * @tx_bytes: number of bytes transmitted to this STA
  * @tid_seq: per-TID sequence numbers for sending to this STA
  * @ampdu_mlme: A-MPDU state machine state
  * @timer_to_tid: identity mapping to ID timers
@@ -386,32 +368,22 @@ DECLARE_EWMA(signal, 1024, 8)
  * @debugfs: debug filesystem info
  * @dead: set to true when sta is unlinked
  * @uploaded: set to true when sta is uploaded to the driver
- * @lost_packets: number of consecutive lost packets
  * @sta: station information we share with the driver
  * @sta_state: duplicates information about station state (for debug)
  * @beacon_loss_count: number of times beacon loss has triggered
  * @rcu_head: RCU head used for freeing this station struct
  * @cur_max_bandwidth: maximum bandwidth to use for TX to the station,
  *	taken from HT/VHT capabilities or VHT operating mode notification
- * @chains: chains ever used for RX from this station
- * @chain_signal_last: last signal (per chain)
- * @chain_signal_avg: signal average (per chain)
  * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for
  *	AP only.
  * @cipher_scheme: optional cipher scheme for this station
- * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed
  * @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
- * @tx_msdu: MSDUs transmitted to this station, using IEEE80211_NUM_TID
- *	entry for non-QoS frames
- * @tx_msdu_retries: MSDU retries for transmissions to to this station,
- *	using IEEE80211_NUM_TID entry for non-QoS frames
- * @tx_msdu_failed: MSDU failures for transmissions to to this station,
- *	using IEEE80211_NUM_TID entry for non-QoS frames
- * @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID
- *	entry for non-QoS frames
  * @fast_tx: TX fastpath information
  * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to
  *	the BSS one.
+ * @tx_stats: TX statistics
+ * @rx_stats: RX statistics
+ * @status_stats: TX status statistics
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -455,42 +427,49 @@ struct sta_info {
 	unsigned long driver_buffered_tids;
 	unsigned long txq_buffered_tids;
 
-	/* Updated from RX path only, no locking requirements */
-	unsigned long rx_packets;
-	u64 rx_bytes;
-	unsigned long last_rx;
 	long last_connected;
-	unsigned long num_duplicates;
-	unsigned long rx_fragments;
-	unsigned long rx_dropped;
-	int last_signal;
-	struct ewma_signal avg_signal;
-	int last_ack_signal;
 
-	u8 chains;
-	s8 chain_signal_last[IEEE80211_MAX_CHAINS];
-	struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS];
+	/* Updated from RX path only, no locking requirements */
+	struct {
+		unsigned long packets;
+		u64 bytes;
+		unsigned long last_rx;
+		unsigned long num_duplicates;
+		unsigned long fragments;
+		unsigned long dropped;
+		int last_signal;
+		struct ewma_signal avg_signal;
+		u8 chains;
+		s8 chain_signal_last[IEEE80211_MAX_CHAINS];
+		struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS];
+		int last_rate_idx;
+		u32 last_rate_flag;
+		u32 last_rate_vht_flag;
+		u8 last_rate_vht_nss;
+		u64 msdu[IEEE80211_NUM_TIDS + 1];
+	} rx_stats;
 
 	/* Plus 1 for non-QoS frames */
 	__le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1];
 
 	/* Updated from TX status path only, no locking requirements */
-	unsigned long tx_filtered_count;
-	unsigned long tx_retry_failed, tx_retry_count;
+	struct {
+		unsigned long filtered;
+		unsigned long retry_failed, retry_count;
+		unsigned int lost_packets;
+		unsigned long last_tdls_pkt_time;
+		u64 msdu_retries[IEEE80211_NUM_TIDS + 1];
+		u64 msdu_failed[IEEE80211_NUM_TIDS + 1];
+	} status_stats;
 
 	/* Updated from TX path only, no locking requirements */
-	u64 tx_packets[IEEE80211_NUM_ACS];
-	u64 tx_bytes[IEEE80211_NUM_ACS];
-	struct ieee80211_tx_rate last_tx_rate;
-	int last_rx_rate_idx;
-	u32 last_rx_rate_flag;
-	u32 last_rx_rate_vht_flag;
-	u8 last_rx_rate_vht_nss;
+	struct {
+		u64 packets[IEEE80211_NUM_ACS];
+		u64 bytes[IEEE80211_NUM_ACS];
+		struct ieee80211_tx_rate last_rate;
+		u64 msdu[IEEE80211_NUM_TIDS + 1];
+	} tx_stats;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
-	u64 tx_msdu[IEEE80211_NUM_TIDS + 1];
-	u64 tx_msdu_retries[IEEE80211_NUM_TIDS + 1];
-	u64 tx_msdu_failed[IEEE80211_NUM_TIDS + 1];
-	u64 rx_msdu[IEEE80211_NUM_TIDS + 1];
 
 	/*
 	 * Aggregation information, locked with lock.
@@ -507,15 +486,9 @@ struct sta_info {
 
 	enum ieee80211_sta_rx_bandwidth cur_max_bandwidth;
 
-	unsigned int lost_packets;
-	unsigned int beacon_loss_count;
-
 	enum ieee80211_smps_mode known_smps_mode;
 	const struct ieee80211_cipher_scheme *cipher_scheme;
 
-	/* TDLS timeout data */
-	unsigned long last_tdls_pkt_time;
-
 	u8 reserved_tid;
 
 	struct cfg80211_chan_def tdls_chandef;
@@ -686,8 +659,6 @@ static inline int sta_info_flush(struct ieee80211_sub_if_data *sdata)
 void sta_set_rate_info_tx(struct sta_info *sta,
 			  const struct ieee80211_tx_rate *rate,
 			  struct rate_info *rinfo);
-void sta_set_rate_info_rx(struct sta_info *sta,
-			  struct rate_info *rinfo);
 void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo);
 
 void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 3ed7ddfbf8e8..5bad05e9af90 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -67,7 +67,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 		       IEEE80211_TX_INTFL_RETRANSMISSION;
 	info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
 
-	sta->tx_filtered_count++;
+	sta->status_stats.filtered++;
 
 	/*
 	 * Clear more-data bit on filtered frames, it might be set
@@ -183,7 +183,7 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 
 	if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
-		sta->last_rx = jiffies;
+		sta->rx_stats.last_rx = jiffies;
 
 	if (ieee80211_is_data_qos(mgmt->frame_control)) {
 		struct ieee80211_hdr *hdr = (void *) skb->data;
@@ -557,8 +557,9 @@ static void ieee80211_lost_packet(struct sta_info *sta,
 	    !(info->flags & IEEE80211_TX_STAT_AMPDU))
 		return;
 
-	sta->lost_packets++;
-	if (!sta->sta.tdls && sta->lost_packets < STA_LOST_PKT_THRESHOLD)
+	sta->status_stats.lost_packets++;
+	if (!sta->sta.tdls &&
+	    sta->status_stats.lost_packets < STA_LOST_PKT_THRESHOLD)
 		return;
 
 	/*
@@ -568,14 +569,15 @@ static void ieee80211_lost_packet(struct sta_info *sta,
 	 * mechanism.
 	 */
 	if (sta->sta.tdls &&
-	    (sta->lost_packets < STA_LOST_TDLS_PKT_THRESHOLD ||
+	    (sta->status_stats.lost_packets < STA_LOST_TDLS_PKT_THRESHOLD ||
 	     time_before(jiffies,
-			 sta->last_tdls_pkt_time + STA_LOST_TDLS_PKT_TIME)))
+			 sta->status_stats.last_tdls_pkt_time +
+			 STA_LOST_TDLS_PKT_TIME)))
 		return;
 
 	cfg80211_cqm_pktloss_notify(sta->sdata->dev, sta->sta.addr,
-				    sta->lost_packets, GFP_ATOMIC);
-	sta->lost_packets = 0;
+				    sta->status_stats.lost_packets, GFP_ATOMIC);
+	sta->status_stats.lost_packets = 0;
 }
 
 static int ieee80211_tx_get_rates(struct ieee80211_hw *hw,
@@ -636,18 +638,18 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
 		sta = container_of(pubsta, struct sta_info, sta);
 
 		if (!acked)
-			sta->tx_retry_failed++;
-		sta->tx_retry_count += retry_count;
+			sta->status_stats.retry_failed++;
+		sta->status_stats.retry_count += retry_count;
 
 		if (acked) {
-			sta->last_rx = jiffies;
+			sta->rx_stats.last_rx = jiffies;
 
-			if (sta->lost_packets)
-				sta->lost_packets = 0;
+			if (sta->status_stats.lost_packets)
+				sta->status_stats.lost_packets = 0;
 
 			/* Track when last TDLS packet was ACKed */
 			if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
-				sta->last_tdls_pkt_time = jiffies;
+				sta->status_stats.last_tdls_pkt_time = jiffies;
 		} else {
 			ieee80211_lost_packet(sta, info);
 		}
@@ -669,16 +671,70 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_tx_status_noskb);
 
-void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
+void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
+			  struct ieee80211_supported_band *sband,
+			  int retry_count, int shift, bool send_to_cooked)
 {
 	struct sk_buff *skb2;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_sub_if_data *sdata;
+	struct net_device *prev_dev = NULL;
+	int rtap_len;
+
+	/* send frame to monitor interfaces now */
+	rtap_len = ieee80211_tx_radiotap_len(info);
+	if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
+		pr_err("ieee80211_tx_status: headroom too small\n");
+		dev_kfree_skb(skb);
+		return;
+	}
+	ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
+					 rtap_len, shift);
+
+	/* XXX: is this sufficient for BPF? */
+	skb_set_mac_header(skb, 0);
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	skb->pkt_type = PACKET_OTHERHOST;
+	skb->protocol = htons(ETH_P_802_2);
+	memset(skb->cb, 0, sizeof(skb->cb));
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+		if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+			if (!ieee80211_sdata_running(sdata))
+				continue;
+
+			if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
+			    !send_to_cooked)
+				continue;
+
+			if (prev_dev) {
+				skb2 = skb_clone(skb, GFP_ATOMIC);
+				if (skb2) {
+					skb2->dev = prev_dev;
+					netif_rx(skb2);
+				}
+			}
+
+			prev_dev = sdata->dev;
+		}
+	}
+	if (prev_dev) {
+		skb->dev = prev_dev;
+		netif_rx(skb);
+		skb = NULL;
+	}
+	rcu_read_unlock();
+	dev_kfree_skb(skb);
+}
+
+void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
+{
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	__le16 fc;
 	struct ieee80211_supported_band *sband;
-	struct ieee80211_sub_if_data *sdata;
-	struct net_device *prev_dev = NULL;
 	struct sta_info *sta;
 	struct rhash_head *tmp;
 	int retry_count;
@@ -686,7 +742,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	bool send_to_cooked;
 	bool acked;
 	struct ieee80211_bar *bar;
-	int rtap_len;
 	int shift = 0;
 	int tid = IEEE80211_NUM_TIDS;
 	const struct bucket_table *tbl;
@@ -731,7 +786,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
 		    (ieee80211_is_data(hdr->frame_control)) &&
 		    (rates_idx != -1))
-			sta->last_tx_rate = info->status.rates[rates_idx];
+			sta->tx_stats.last_rate =
+				info->status.rates[rates_idx];
 
 		if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
 		    (ieee80211_is_data_qos(fc))) {
@@ -777,13 +833,15 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			return;
 		} else {
 			if (!acked)
-				sta->tx_retry_failed++;
-			sta->tx_retry_count += retry_count;
+				sta->status_stats.retry_failed++;
+			sta->status_stats.retry_count += retry_count;
 
 			if (ieee80211_is_data_present(fc)) {
 				if (!acked)
-					sta->tx_msdu_failed[tid]++;
-				sta->tx_msdu_retries[tid] += retry_count;
+					sta->status_stats.msdu_failed[tid]++;
+
+				sta->status_stats.msdu_retries[tid] +=
+					retry_count;
 			}
 		}
 
@@ -801,19 +859,17 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 			if (info->flags & IEEE80211_TX_STAT_ACK) {
-				if (sta->lost_packets)
-					sta->lost_packets = 0;
+				if (sta->status_stats.lost_packets)
+					sta->status_stats.lost_packets = 0;
 
 				/* Track when last TDLS packet was ACKed */
 				if (test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH))
-					sta->last_tdls_pkt_time = jiffies;
+					sta->status_stats.last_tdls_pkt_time =
+						jiffies;
 			} else {
 				ieee80211_lost_packet(sta, info);
 			}
 		}
-
-		if (acked)
-			sta->last_ack_signal = info->status.ack_signal;
 	}
 
 	rcu_read_unlock();
@@ -879,51 +935,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		return;
 	}
 
-	/* send frame to monitor interfaces now */
-	rtap_len = ieee80211_tx_radiotap_len(info);
-	if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
-		pr_err("ieee80211_tx_status: headroom too small\n");
-		dev_kfree_skb(skb);
-		return;
-	}
-	ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
-					 rtap_len, shift);
-
-	/* XXX: is this sufficient for BPF? */
-	skb_set_mac_header(skb, 0);
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
-	skb->pkt_type = PACKET_OTHERHOST;
-	skb->protocol = htons(ETH_P_802_2);
-	memset(skb->cb, 0, sizeof(skb->cb));
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
-			if (!ieee80211_sdata_running(sdata))
-				continue;
-
-			if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
-			    !send_to_cooked)
-				continue;
-
-			if (prev_dev) {
-				skb2 = skb_clone(skb, GFP_ATOMIC);
-				if (skb2) {
-					skb2->dev = prev_dev;
-					netif_rx(skb2);
-				}
-			}
-
-			prev_dev = sdata->dev;
-		}
-	}
-	if (prev_dev) {
-		skb->dev = prev_dev;
-		netif_rx(skb);
-		skb = NULL;
-	}
-	rcu_read_unlock();
-	dev_kfree_skb(skb);
+	/* send to monitor interfaces */
+	ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked);
 }
 EXPORT_SYMBOL(ieee80211_tx_status);
 
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 4e202d0679b2..c9eeb3f12808 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -41,9 +41,11 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
 					 struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	bool chan_switch = local->hw.wiphy->features &
 			   NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
-	bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW);
+	bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
+			  !ifmgd->tdls_wider_bw_prohibited;
 	enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
 	struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
 	bool vht = sband && sband->vht_cap.vht_supported;
@@ -331,8 +333,8 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
 
 	/* proceed to downgrade the chandef until usable or the same */
 	while (uc.width > max_width &&
-	       !cfg80211_reg_can_beacon(sdata->local->hw.wiphy,
-					&uc, sdata->wdev.iftype))
+	       !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
+					      sdata->wdev.iftype))
 		ieee80211_chandef_downgrade(&uc);
 
 	if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
@@ -589,12 +591,19 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
 		offset = noffset;
 	}
 
-	/* if HT support is only added in TDLS, we need an HT-operation IE */
+	/*
+	 * if HT support is only added in TDLS, we need an HT-operation IE.
+	 * add the IE as required by IEEE802.11-2012 9.23.3.2.
+	 */
 	if (!ap_sta->sta.ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
+		u16 prot = IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED |
+			   IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
+			   IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
+
 		pos = skb_put(skb, 2 + sizeof(struct ieee80211_ht_operation));
-		/* send an empty HT operation IE */
 		ieee80211_ie_build_ht_oper(pos, &sta->sta.ht_cap,
-					   &sdata->vif.bss_conf.chandef, 0);
+					   &sdata->vif.bss_conf.chandef, prot,
+					   true);
 	}
 
 	ieee80211_tdls_add_link_ie(sdata, skb, peer, initiator);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 6f14591d8ca9..56c6d6cfa5a1 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -33,11 +33,11 @@
 			__field(u32, chan_width)					\
 			__field(u32, center_freq1)					\
 			__field(u32, center_freq2)
-#define CHANDEF_ASSIGN(c)								\
-			__entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0;	\
-			__entry->chan_width = (c)->width;				\
-			__entry->center_freq1 = (c)->center_freq1;			\
-			__entry->center_freq2 = (c)->center_freq2;
+#define CHANDEF_ASSIGN(c)							\
+			__entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0;	\
+			__entry->chan_width = (c) ? (c)->width : 0;			\
+			__entry->center_freq1 = (c) ? (c)->center_freq1 : 0;		\
+			__entry->center_freq2 = (c) ? (c)->center_freq2 : 0;
 #define CHANDEF_PR_FMT	" control:%d MHz width:%d center: %d/%d MHz"
 #define CHANDEF_PR_ARG	__entry->control_freq, __entry->chan_width,			\
 			__entry->center_freq1, __entry->center_freq2
@@ -325,7 +325,6 @@ TRACE_EVENT(drv_config,
 		__field(u32, flags)
 		__field(int, power_level)
 		__field(int, dynamic_ps_timeout)
-		__field(int, max_sleep_period)
 		__field(u16, listen_interval)
 		__field(u8, long_frame_max_tx_count)
 		__field(u8, short_frame_max_tx_count)
@@ -339,7 +338,6 @@ TRACE_EVENT(drv_config,
 		__entry->flags = local->hw.conf.flags;
 		__entry->power_level = local->hw.conf.power_level;
 		__entry->dynamic_ps_timeout = local->hw.conf.dynamic_ps_timeout;
-		__entry->max_sleep_period = local->hw.conf.max_sleep_period;
 		__entry->listen_interval = local->hw.conf.listen_interval;
 		__entry->long_frame_max_tx_count =
 			local->hw.conf.long_frame_max_tx_count;
@@ -497,6 +495,36 @@ TRACE_EVENT(drv_configure_filter,
 	)
 );
 
+TRACE_EVENT(drv_config_iface_filter,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 unsigned int filter_flags,
+		 unsigned int changed_flags),
+
+	TP_ARGS(local, sdata, filter_flags, changed_flags),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(unsigned int, filter_flags)
+		__field(unsigned int, changed_flags)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->filter_flags = filter_flags;
+		__entry->changed_flags = changed_flags;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT VIF_PR_FMT
+		" filter_flags: %#x changed_flags: %#x",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->filter_flags,
+		__entry->changed_flags
+	)
+);
+
 TRACE_EVENT(drv_set_tim,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sta *sta, bool set),
@@ -944,9 +972,9 @@ TRACE_EVENT(drv_ampdu_action,
 		 struct ieee80211_sub_if_data *sdata,
 		 enum ieee80211_ampdu_mlme_action action,
 		 struct ieee80211_sta *sta, u16 tid,
-		 u16 *ssn, u8 buf_size),
+		 u16 *ssn, u8 buf_size, bool amsdu),
 
-	TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
+	TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size, amsdu),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
@@ -955,6 +983,7 @@ TRACE_EVENT(drv_ampdu_action,
 		__field(u16, tid)
 		__field(u16, ssn)
 		__field(u8, buf_size)
+		__field(bool, amsdu)
 		VIF_ENTRY
 	),
 
@@ -966,12 +995,13 @@ TRACE_EVENT(drv_ampdu_action,
 		__entry->tid = tid;
 		__entry->ssn = ssn ? *ssn : 0;
 		__entry->buf_size = buf_size;
+		__entry->amsdu = amsdu;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
+		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d amsdu:%d",
 		LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
-		__entry->tid, __entry->buf_size
+		__entry->tid, __entry->buf_size, __entry->amsdu
 	)
 );
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7892eb8ed4c8..bdc224d5053a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -757,9 +757,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	if (txrc.reported_rate.idx < 0) {
 		txrc.reported_rate = tx->rate;
 		if (tx->sta && ieee80211_is_data(hdr->frame_control))
-			tx->sta->last_tx_rate = txrc.reported_rate;
+			tx->sta->tx_stats.last_rate = txrc.reported_rate;
 	} else if (tx->sta)
-		tx->sta->last_tx_rate = txrc.reported_rate;
+		tx->sta->tx_stats.last_rate = txrc.reported_rate;
 
 	if (ratetbl)
 		return TX_CONTINUE;
@@ -824,7 +824,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 		hdr->seq_ctrl = cpu_to_le16(tx->sdata->sequence_number);
 		tx->sdata->sequence_number += 0x10;
 		if (tx->sta)
-			tx->sta->tx_msdu[IEEE80211_NUM_TIDS]++;
+			tx->sta->tx_stats.msdu[IEEE80211_NUM_TIDS]++;
 		return TX_CONTINUE;
 	}
 
@@ -840,7 +840,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
 
 	qc = ieee80211_get_qos_ctl(hdr);
 	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
-	tx->sta->tx_msdu[tid]++;
+	tx->sta->tx_stats.msdu[tid]++;
 
 	if (!tx->sta->sta.txq[0])
 		hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
@@ -994,10 +994,10 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
 
 	skb_queue_walk(&tx->skbs, skb) {
 		ac = skb_get_queue_mapping(skb);
-		tx->sta->tx_bytes[ac] += skb->len;
+		tx->sta->tx_stats.bytes[ac] += skb->len;
 	}
 	if (ac >= 0)
-		tx->sta->tx_packets[ac]++;
+		tx->sta->tx_stats.packets[ac]++;
 
 	return TX_CONTINUE;
 }
@@ -2770,7 +2770,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
 
 	if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
 		*ieee80211_get_qos_ctl(hdr) = tid;
-		hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+		if (!sta->sta.txq[0])
+			hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
 	} else {
 		info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
 		hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
@@ -2778,10 +2779,10 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (skb_shinfo(skb)->gso_size)
-		sta->tx_msdu[tid] +=
+		sta->tx_stats.msdu[tid] +=
 			DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
 	else
-		sta->tx_msdu[tid]++;
+		sta->tx_stats.msdu[tid]++;
 
 	info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
 
@@ -2812,8 +2813,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
 	/* statistics normally done by ieee80211_tx_h_stats (but that
 	 * has to consider fragmentation, so is more complex)
 	 */
-	sta->tx_bytes[skb_get_queue_mapping(skb)] += skb->len;
-	sta->tx_packets[skb_get_queue_mapping(skb)]++;
+	sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+	sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
 
 	if (fast_tx->pn_offs) {
 		u64 pn;
@@ -3515,6 +3516,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 {
 	struct ieee80211_mutable_offsets offs = {};
 	struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false);
+	struct sk_buff *copy;
+	struct ieee80211_supported_band *sband;
+	int shift;
+
+	if (!bcn)
+		return bcn;
 
 	if (tim_offset)
 		*tim_offset = offs.tim_offset;
@@ -3522,6 +3529,19 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	if (tim_length)
 		*tim_length = offs.tim_length;
 
+	if (ieee80211_hw_check(hw, BEACON_TX_STATUS) ||
+	    !hw_to_local(hw)->monitors)
+		return bcn;
+
+	/* send a copy to monitor interfaces */
+	copy = skb_copy(bcn, GFP_ATOMIC);
+	if (!copy)
+		return bcn;
+
+	shift = ieee80211_vif_get_shift(vif);
+	sband = hw->wiphy->bands[ieee80211_get_sdata_band(vif_to_sdata(vif))];
+	ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false);
+
 	return bcn;
 }
 EXPORT_SYMBOL(ieee80211_beacon_get_tim);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1104421bc525..74058020b7d6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -4,6 +4,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright (C) 2015	Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1104,13 +1105,13 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 }
 
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
-			       bool bss_notify)
+			       bool bss_notify, bool enable_qos)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_queue_params qparam;
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	int ac;
-	bool use_11b, enable_qos;
+	bool use_11b;
 	bool is_ocb; /* Use another EDCA parameters if dot11OCBActivated=true */
 	int aCWmin, aCWmax;
 
@@ -1129,13 +1130,6 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,
 		 !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE);
 	rcu_read_unlock();
 
-	/*
-	 * By default disable QoS in STA mode for old access points, which do
-	 * not support 802.11e. New APs will provide proper queue parameters,
-	 * that we will configure later.
-	 */
-	enable_qos = (sdata->vif.type != NL80211_IFTYPE_STATION);
-
 	is_ocb = (sdata->vif.type == NL80211_IFTYPE_OCB);
 
 	/* Set defaults according to 802.11-2007 Table 7-37 */
@@ -1664,7 +1658,6 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
 
 	local->resuming = false;
 	local->suspended = false;
-	local->started = false;
 	local->in_reconfig = false;
 
 	/* scheduled scan clearly can't be running any more, but tell
@@ -1754,6 +1747,18 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 #endif
 
 	/*
+	 * In case of hw_restart during suspend (without wowlan),
+	 * cancel restart work, as we are reconfiguring the device
+	 * anyway.
+	 * Note that restart_work is scheduled on a frozen workqueue,
+	 * so we can't deadlock in this case.
+	 */
+	if (suspended && local->in_reconfig && !reconfig_due_to_wowlan)
+		cancel_work_sync(&local->restart_work);
+
+	local->started = false;
+
+	/*
 	 * Upon resume hardware can sometimes be goofy due to
 	 * various platform / driver / bus issues, so restarting
 	 * the device may at times not work immediately. Propagate
@@ -1951,7 +1956,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		}
 	}
 
-	ieee80211_recalc_ps(local, -1);
+	ieee80211_recalc_ps(local);
 
 	/*
 	 * The sta might be in psm against the ap (e.g. because
@@ -1966,7 +1971,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 			if (!sdata->u.mgd.associated)
 				continue;
 
-			ieee80211_send_nullfunc(local, sdata, 0);
+			ieee80211_send_nullfunc(local, sdata, false);
 		}
 	}
 
@@ -1996,6 +2001,29 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		if (ieee80211_sdata_running(sdata))
 			ieee80211_enable_keys(sdata);
 
+	/* Reconfigure sched scan if it was interrupted by FW restart */
+	mutex_lock(&local->mtx);
+	sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
+						lockdep_is_held(&local->mtx));
+	sched_scan_req = rcu_dereference_protected(local->sched_scan_req,
+						lockdep_is_held(&local->mtx));
+	if (sched_scan_sdata && sched_scan_req)
+		/*
+		 * Sched scan stopped, but we don't want to report it. Instead,
+		 * we're trying to reschedule. However, if more than one scan
+		 * plan was set, we cannot reschedule since we don't know which
+		 * scan plan was currently running (and some scan plans may have
+		 * already finished).
+		 */
+		if (sched_scan_req->n_scan_plans > 1 ||
+		    __ieee80211_request_sched_scan_start(sched_scan_sdata,
+							 sched_scan_req))
+			sched_scan_stopped = true;
+	mutex_unlock(&local->mtx);
+
+	if (sched_scan_stopped)
+		cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
+
  wake_up:
 	local->in_reconfig = false;
 	barrier();
@@ -2017,8 +2045,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 		mutex_lock(&local->sta_mtx);
 
 		list_for_each_entry(sta, &local->sta_list, list) {
-			ieee80211_sta_tear_down_BA_sessions(
-					sta, AGG_STOP_LOCAL_REQUEST);
+			if (!local->resuming)
+				ieee80211_sta_tear_down_BA_sessions(
+						sta, AGG_STOP_LOCAL_REQUEST);
 			clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
 		}
 
@@ -2030,28 +2059,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 					false);
 
 	/*
-	 * Reconfigure sched scan if it was interrupted by FW restart or
-	 * suspend.
-	 */
-	mutex_lock(&local->mtx);
-	sched_scan_sdata = rcu_dereference_protected(local->sched_scan_sdata,
-						lockdep_is_held(&local->mtx));
-	sched_scan_req = rcu_dereference_protected(local->sched_scan_req,
-						lockdep_is_held(&local->mtx));
-	if (sched_scan_sdata && sched_scan_req)
-		/*
-		 * Sched scan stopped, but we don't want to report it. Instead,
-		 * we're trying to reschedule.
-		 */
-		if (__ieee80211_request_sched_scan_start(sched_scan_sdata,
-							 sched_scan_req))
-			sched_scan_stopped = true;
-	mutex_unlock(&local->mtx);
-
-	if (sched_scan_stopped)
-		cfg80211_sched_scan_stopped_rtnl(local->hw.wiphy);
-
-	/*
 	 * If this is for hw restart things are still running.
 	 * We may want to change that later, however.
 	 */
@@ -2135,7 +2142,13 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata)
 	chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
 					lockdep_is_held(&local->chanctx_mtx));
 
-	if (WARN_ON_ONCE(!chanctx_conf))
+	/*
+	 * This function can be called from a work, thus it may be possible
+	 * that the chanctx_conf is removed (due to a disconnection, for
+	 * example).
+	 * So nothing should be done in such case.
+	 */
+	if (!chanctx_conf)
 		goto unlock;
 
 	chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, conf);
@@ -2272,7 +2285,7 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 
 u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 			       const struct cfg80211_chan_def *chandef,
-			       u16 prot_mode)
+			       u16 prot_mode, bool rifs_mode)
 {
 	struct ieee80211_ht_operation *ht_oper;
 	/* Build HT Information */
@@ -2300,6 +2313,9 @@ u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 	    chandef->width != NL80211_CHAN_WIDTH_20)
 		ht_oper->ht_param |= IEEE80211_HT_PARAM_CHAN_WIDTH_ANY;
 
+	if (rifs_mode)
+		ht_oper->ht_param |= IEEE80211_HT_PARAM_RIFS_MODE;
+
 	ht_oper->operation_mode = cpu_to_le16(prot_mode);
 	ht_oper->stbc_param = 0x0000;
 
@@ -2324,6 +2340,8 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 	if (chandef->center_freq2)
 		vht_oper->center_freq_seg2_idx =
 			ieee80211_frequency_to_channel(chandef->center_freq2);
+	else
+		vht_oper->center_freq_seg2_idx = 0x00;
 
 	switch (chandef->width) {
 	case NL80211_CHAN_WIDTH_160:
@@ -2541,7 +2559,7 @@ int ieee80211_ave_rssi(struct ieee80211_vif *vif)
 		/* non-managed type inferfaces */
 		return 0;
 	}
-	return ifmgd->ave_beacon_signal / 16;
+	return -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
 }
 EXPORT_SYMBOL_GPL(ieee80211_ave_rssi);
 
@@ -2951,6 +2969,13 @@ ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i)
 	if (end > 0)
 		return false;
 
+	/* One shot NOA  */
+	if (data->count[i] == 1)
+		return false;
+
+	if (data->desc[i].interval == 0)
+		return false;
+
 	/* End time is in the past, check for repetitions */
 	skip = DIV_ROUND_UP(-end, data->desc[i].interval);
 	if (data->count[i] < 255) {
@@ -3298,9 +3323,11 @@ void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
 	if (sta) {
 		txqi->txq.sta = &sta->sta;
 		sta->sta.txq[tid] = &txqi->txq;
+		txqi->txq.tid = tid;
 		txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
 	} else {
 		sdata->vif.txq = &txqi->txq;
+		txqi->txq.tid = 0;
 		txqi->txq.ac = IEEE80211_AC_BE;
 	}
 }
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index feb547dc8643..d824c38971ed 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -174,9 +174,12 @@ mic_fail_no_key:
 	 * a driver that supports HW encryption. Send up the key idx only if
 	 * the key is set.
 	 */
-	mac80211_ev_michael_mic_failure(rx->sdata,
-					rx->key ? rx->key->conf.keyidx : -1,
-					(void *) skb->data, NULL, GFP_ATOMIC);
+	cfg80211_michael_mic_failure(rx->sdata->dev, hdr->addr2,
+				     is_multicast_ether_addr(hdr->addr1) ?
+				     NL80211_KEYTYPE_GROUP :
+				     NL80211_KEYTYPE_PAIRWISE,
+				     rx->key ? rx->key->conf.keyidx : -1,
+				     NULL, GFP_ATOMIC);
 	return RX_DROP_UNUSABLE;
 }
 
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index c865ebb2ace2..57b5e94471af 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -266,6 +266,195 @@ ieee802154_set_ackreq_default(struct wpan_phy *wpan_phy,
 	return 0;
 }
 
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+static void
+ieee802154_get_llsec_table(struct wpan_phy *wpan_phy,
+			   struct wpan_dev *wpan_dev,
+			   struct ieee802154_llsec_table **table)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+
+	*table = &sdata->sec.table;
+}
+
+static void
+ieee802154_lock_llsec_table(struct wpan_phy *wpan_phy,
+			    struct wpan_dev *wpan_dev)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+
+	mutex_lock(&sdata->sec_mtx);
+}
+
+static void
+ieee802154_unlock_llsec_table(struct wpan_phy *wpan_phy,
+			      struct wpan_dev *wpan_dev)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+
+	mutex_unlock(&sdata->sec_mtx);
+}
+
+static int
+ieee802154_set_llsec_params(struct wpan_phy *wpan_phy,
+			    struct wpan_dev *wpan_dev,
+			    const struct ieee802154_llsec_params *params,
+			    int changed)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_set_params(&sdata->sec, params, changed);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_get_llsec_params(struct wpan_phy *wpan_phy,
+			    struct wpan_dev *wpan_dev,
+			    struct ieee802154_llsec_params *params)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_get_params(&sdata->sec, params);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_add_llsec_key(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+			 const struct ieee802154_llsec_key_id *id,
+			 const struct ieee802154_llsec_key *key)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_key_add(&sdata->sec, id, key);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_del_llsec_key(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+			 const struct ieee802154_llsec_key_id *id)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_key_del(&sdata->sec, id);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_add_seclevel(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+			const struct ieee802154_llsec_seclevel *sl)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_seclevel_add(&sdata->sec, sl);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_del_seclevel(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+			const struct ieee802154_llsec_seclevel *sl)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_seclevel_del(&sdata->sec, sl);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_add_device(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+		      const struct ieee802154_llsec_device *dev_desc)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_dev_add(&sdata->sec, dev_desc);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_del_device(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+		      __le64 extended_addr)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_dev_del(&sdata->sec, extended_addr);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_add_devkey(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+		      __le64 extended_addr,
+		      const struct ieee802154_llsec_device_key *key)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_devkey_add(&sdata->sec, extended_addr, key);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+
+static int
+ieee802154_del_devkey(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+		      __le64 extended_addr,
+		      const struct ieee802154_llsec_device_key *key)
+{
+	struct net_device *dev = wpan_dev->netdev;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	int res;
+
+	mutex_lock(&sdata->sec_mtx);
+	res = mac802154_llsec_devkey_del(&sdata->sec, extended_addr, key);
+	mutex_unlock(&sdata->sec_mtx);
+
+	return res;
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
 const struct cfg802154_ops mac802154_config_ops = {
 	.add_virtual_intf_deprecated = ieee802154_add_iface_deprecated,
 	.del_virtual_intf_deprecated = ieee802154_del_iface_deprecated,
@@ -284,4 +473,20 @@ const struct cfg802154_ops mac802154_config_ops = {
 	.set_max_frame_retries = ieee802154_set_max_frame_retries,
 	.set_lbt_mode = ieee802154_set_lbt_mode,
 	.set_ackreq_default = ieee802154_set_ackreq_default,
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+	.get_llsec_table = ieee802154_get_llsec_table,
+	.lock_llsec_table = ieee802154_lock_llsec_table,
+	.unlock_llsec_table = ieee802154_unlock_llsec_table,
+	/* TODO above */
+	.set_llsec_params = ieee802154_set_llsec_params,
+	.get_llsec_params = ieee802154_get_llsec_params,
+	.add_llsec_key = ieee802154_add_llsec_key,
+	.del_llsec_key = ieee802154_del_llsec_key,
+	.add_seclevel = ieee802154_add_seclevel,
+	.del_seclevel = ieee802154_del_seclevel,
+	.add_device = ieee802154_add_device,
+	.del_device = ieee802154_del_device,
+	.add_devkey = ieee802154_add_devkey,
+	.del_devkey = ieee802154_del_devkey,
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 };
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index ed26952f9e14..7079cd32a7ad 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -367,12 +367,11 @@ static int mac802154_set_header_security(struct ieee802154_sub_if_data *sdata,
 	return 0;
 }
 
-static int mac802154_header_create(struct sk_buff *skb,
-				   struct net_device *dev,
-				   unsigned short type,
-				   const void *daddr,
-				   const void *saddr,
-				   unsigned len)
+static int ieee802154_header_create(struct sk_buff *skb,
+				    struct net_device *dev,
+				    const struct ieee802154_addr *daddr,
+				    const struct ieee802154_addr *saddr,
+				    unsigned len)
 {
 	struct ieee802154_hdr hdr;
 	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
@@ -423,24 +422,89 @@ static int mac802154_header_create(struct sk_buff *skb,
 	return hlen;
 }
 
+static const struct wpan_dev_header_ops ieee802154_header_ops = {
+	.create		= ieee802154_header_create,
+};
+
+/* This header create functionality assumes a 8 byte array for
+ * source and destination pointer at maximum. To adapt this for
+ * the 802.15.4 dataframe header we use extended address handling
+ * here only and intra pan connection. fc fields are mostly fallback
+ * handling. For provide dev_hard_header for dgram sockets.
+ */
+static int mac802154_header_create(struct sk_buff *skb,
+				   struct net_device *dev,
+				   unsigned short type,
+				   const void *daddr,
+				   const void *saddr,
+				   unsigned len)
+{
+	struct ieee802154_hdr hdr;
+	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+	struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+	struct ieee802154_mac_cb cb = { };
+	int hlen;
+
+	if (!daddr)
+		return -EINVAL;
+
+	memset(&hdr.fc, 0, sizeof(hdr.fc));
+	hdr.fc.type = IEEE802154_FC_TYPE_DATA;
+	hdr.fc.ack_request = wpan_dev->ackreq;
+	hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF;
+
+	/* TODO currently a workaround to give zero cb block to set
+	 * security parameters defaults according MIB.
+	 */
+	if (mac802154_set_header_security(sdata, &hdr, &cb) < 0)
+		return -EINVAL;
+
+	hdr.dest.pan_id = wpan_dev->pan_id;
+	hdr.dest.mode = IEEE802154_ADDR_LONG;
+	ieee802154_be64_to_le64(&hdr.dest.extended_addr, daddr);
+
+	hdr.source.pan_id = hdr.dest.pan_id;
+	hdr.source.mode = IEEE802154_ADDR_LONG;
+
+	if (!saddr)
+		hdr.source.extended_addr = wpan_dev->extended_addr;
+	else
+		ieee802154_be64_to_le64(&hdr.source.extended_addr, saddr);
+
+	hlen = ieee802154_hdr_push(skb, &hdr);
+	if (hlen < 0)
+		return -EINVAL;
+
+	skb_reset_mac_header(skb);
+	skb->mac_len = hlen;
+
+	if (len > ieee802154_max_payload(&hdr))
+		return -EMSGSIZE;
+
+	return hlen;
+}
+
 static int
 mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 {
 	struct ieee802154_hdr hdr;
-	struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr;
 
 	if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) {
 		pr_debug("malformed packet\n");
 		return 0;
 	}
 
-	*addr = hdr.source;
-	return sizeof(*addr);
+	if (hdr.source.mode == IEEE802154_ADDR_LONG) {
+		ieee802154_le64_to_be64(haddr, &hdr.source.extended_addr);
+		return IEEE802154_EXTENDED_ADDR_LEN;
+	}
+
+	return 0;
 }
 
-static struct header_ops mac802154_header_ops = {
-	.create		= mac802154_header_create,
-	.parse		= mac802154_header_parse,
+static const struct header_ops mac802154_header_ops = {
+	.create         = mac802154_header_create,
+	.parse          = mac802154_header_parse,
 };
 
 static const struct net_device_ops mac802154_wpan_ops = {
@@ -471,9 +535,29 @@ static void ieee802154_if_setup(struct net_device *dev)
 	dev->addr_len		= IEEE802154_EXTENDED_ADDR_LEN;
 	memset(dev->broadcast, 0xff, IEEE802154_EXTENDED_ADDR_LEN);
 
-	dev->hard_header_len	= MAC802154_FRAME_HARD_HEADER_LEN;
-	dev->needed_tailroom	= 2 + 16; /* FCS + MIC */
-	dev->mtu		= IEEE802154_MTU;
+	/* Let hard_header_len set to IEEE802154_MIN_HEADER_LEN. AF_PACKET
+	 * will not send frames without any payload, but ack frames
+	 * has no payload, so substract one that we can send a 3 bytes
+	 * frame. The xmit callback assumes at least a hard header where two
+	 * bytes fc and sequence field are set.
+	 */
+	dev->hard_header_len	= IEEE802154_MIN_HEADER_LEN - 1;
+	/* The auth_tag header is for security and places in private payload
+	 * room of mac frame which stucks between payload and FCS field.
+	 */
+	dev->needed_tailroom	= IEEE802154_MAX_AUTH_TAG_LEN +
+				  IEEE802154_FCS_LEN;
+	/* The mtu size is the payload without mac header in this case.
+	 * We have a dynamic length header with a minimum header length
+	 * which is hard_header_len. In this case we let mtu to the size
+	 * of maximum payload which is IEEE802154_MTU - IEEE802154_FCS_LEN -
+	 * hard_header_len. The FCS which is set by hardware or ndo_start_xmit
+	 * and the minimum mac header which can be evaluated inside driver
+	 * layer. The rest of mac header will be part of payload if greater
+	 * than hard_header_len.
+	 */
+	dev->mtu		= IEEE802154_MTU - IEEE802154_FCS_LEN -
+				  dev->hard_header_len;
 	dev->tx_queue_len	= 300;
 	dev->flags		= IFF_NOARP | IFF_BROADCAST;
 }
@@ -513,6 +597,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
 		sdata->dev->netdev_ops = &mac802154_wpan_ops;
 		sdata->dev->ml_priv = &mac802154_mlme_wpan;
 		wpan_dev->promiscuous_mode = false;
+		wpan_dev->header_ops = &ieee802154_header_ops;
 
 		mutex_init(&sdata->sec_mtx);
 
@@ -550,7 +635,8 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
 	if (!ndev)
 		return ERR_PTR(-ENOMEM);
 
-	ndev->needed_headroom = local->hw.extra_tx_headroom;
+	ndev->needed_headroom = local->hw.extra_tx_headroom +
+				IEEE802154_MAX_HEADER_LEN;
 
 	ret = dev_alloc_name(ndev, ndev->name);
 	if (ret < 0)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 985e9394e2af..a13d02b7cee4 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -55,7 +55,7 @@ void mac802154_llsec_destroy(struct mac802154_llsec *sec)
 
 		msl = container_of(sl, struct mac802154_llsec_seclevel, level);
 		list_del(&sl->list);
-		kfree(msl);
+		kzfree(msl);
 	}
 
 	list_for_each_entry_safe(dev, dn, &sec->table.devices, list) {
@@ -72,7 +72,7 @@ void mac802154_llsec_destroy(struct mac802154_llsec *sec)
 		mkey = container_of(key->key, struct mac802154_llsec_key, key);
 		list_del(&key->list);
 		llsec_key_put(mkey);
-		kfree(key);
+		kzfree(key);
 	}
 }
 
@@ -161,7 +161,7 @@ err_tfm:
 		if (key->tfm[i])
 			crypto_free_aead(key->tfm[i]);
 
-	kfree(key);
+	kzfree(key);
 	return NULL;
 }
 
@@ -176,7 +176,7 @@ static void llsec_key_release(struct kref *ref)
 		crypto_free_aead(key->tfm[i]);
 
 	crypto_free_blkcipher(key->tfm0);
-	kfree(key);
+	kzfree(key);
 }
 
 static struct mac802154_llsec_key*
@@ -267,7 +267,7 @@ int mac802154_llsec_key_add(struct mac802154_llsec *sec,
 	return 0;
 
 fail:
-	kfree(new);
+	kzfree(new);
 	return -ENOMEM;
 }
 
@@ -347,10 +347,10 @@ static void llsec_dev_free(struct mac802154_llsec_device *dev)
 				      devkey);
 
 		list_del(&pos->list);
-		kfree(devkey);
+		kzfree(devkey);
 	}
 
-	kfree(dev);
+	kzfree(dev);
 }
 
 int mac802154_llsec_dev_add(struct mac802154_llsec *sec,
@@ -401,6 +401,7 @@ int mac802154_llsec_dev_del(struct mac802154_llsec *sec, __le64 device_addr)
 
 	hash_del_rcu(&pos->bucket_s);
 	hash_del_rcu(&pos->bucket_hw);
+	list_del_rcu(&pos->dev.list);
 	call_rcu(&pos->rcu, llsec_dev_free_rcu);
 
 	return 0;
@@ -680,7 +681,7 @@ llsec_do_encrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
 
 	rc = crypto_aead_encrypt(req);
 
-	kfree(req);
+	kzfree(req);
 
 	return rc;
 }
@@ -880,7 +881,7 @@ llsec_do_decrypt_auth(struct sk_buff *skb, const struct mac802154_llsec *sec,
 
 	rc = crypto_aead_decrypt(req);
 
-	kfree(req);
+	kzfree(req);
 	skb_trim(skb, skb->len - authlen);
 
 	return rc;
@@ -920,7 +921,7 @@ llsec_update_devkey_record(struct mac802154_llsec_device *dev,
 		if (!devkey)
 			list_add_rcu(&next->devkey.list, &dev->dev.keys);
 		else
-			kfree(next);
+			kzfree(next);
 
 		spin_unlock_bh(&dev->lock);
 	}
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index d1c33c1d6b9b..42e96729dae6 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -87,6 +87,10 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
 
 	skb->dev = sdata->dev;
 
+	/* TODO this should be moved after netif_receive_skb call, otherwise
+	 * wireshark will show a mac header with security fields and the
+	 * payload is already decrypted.
+	 */
 	rc = mac802154_llsec_decrypt(&sdata->sec, skb);
 	if (rc) {
 		pr_debug("decryption failed: %i\n", rc);
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 7ed439172f30..3827f359b336 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -77,9 +77,6 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
 		put_unaligned_le16(crc, skb_put(skb, 2));
 	}
 
-	if (skb_cow_head(skb, local->hw.extra_tx_headroom))
-		goto err_tx;
-
 	/* Stop the netif queue on each sub_if_data object. */
 	ieee802154_stop_queue(&local->hw);
 
@@ -121,6 +118,10 @@ ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
 	int rc;
 
+	/* TODO we should move it to wpan_dev_hard_header and dev_hard_header
+	 * functions. The reason is wireshark will show a mac header which is
+	 * with security fields but the payload is not encrypted.
+	 */
 	rc = mac802154_llsec_encrypt(&sdata->sec, skb);
 	if (rc) {
 		netdev_warn(dev, "encryption failed: %i\n", rc);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index bb185a28de98..c70d750148b6 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -19,36 +19,13 @@
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 #endif
+#include <net/nexthop.h>
 #include "internal.h"
 
-#define LABEL_NOT_SPECIFIED (1<<20)
-#define MAX_NEW_LABELS 2
-
-/* This maximum ha length copied from the definition of struct neighbour */
-#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
-
-enum mpls_payload_type {
-	MPT_UNSPEC, /* IPv4 or IPv6 */
-	MPT_IPV4 = 4,
-	MPT_IPV6 = 6,
-
-	/* Other types not implemented:
-	 *  - Pseudo-wire with or without control word (RFC4385)
-	 *  - GAL (RFC5586)
-	 */
-};
-
-struct mpls_route { /* next hop label forwarding entry */
-	struct net_device __rcu *rt_dev;
-	struct rcu_head		rt_rcu;
-	u32			rt_label[MAX_NEW_LABELS];
-	u8			rt_protocol; /* routing protocol that set this entry */
-	u8                      rt_payload_type;
-	u8			rt_labels;
-	u8			rt_via_alen;
-	u8			rt_via_table;
-	u8			rt_via[0];
-};
+/* Maximum number of labels to look ahead at when selecting a path of
+ * a multipath route
+ */
+#define MAX_MP_SELECT_LABELS 4
 
 static int zero = 0;
 static int label_limit = (1 << 20) - 1;
@@ -80,10 +57,24 @@ bool mpls_output_possible(const struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(mpls_output_possible);
 
-static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
+static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
+{
+	u8 *nh0_via = PTR_ALIGN((u8 *)&rt->rt_nh[rt->rt_nhn], VIA_ALEN_ALIGN);
+	int nh_index = nh - rt->rt_nh;
+
+	return nh0_via + rt->rt_max_alen * nh_index;
+}
+
+static const u8 *mpls_nh_via(const struct mpls_route *rt,
+			     const struct mpls_nh *nh)
+{
+	return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh);
+}
+
+static unsigned int mpls_nh_header_size(const struct mpls_nh *nh)
 {
 	/* The size of the layer 2.5 labels to be added for this route */
-	return rt->rt_labels * sizeof(struct mpls_shim_hdr);
+	return nh->nh_labels * sizeof(struct mpls_shim_hdr);
 }
 
 unsigned int mpls_dev_mtu(const struct net_device *dev)
@@ -105,6 +96,80 @@ bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 }
 EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
 
+static struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
+					     struct sk_buff *skb, bool bos)
+{
+	struct mpls_entry_decoded dec;
+	struct mpls_shim_hdr *hdr;
+	bool eli_seen = false;
+	int label_index;
+	int nh_index = 0;
+	u32 hash = 0;
+
+	/* No need to look further into packet if there's only
+	 * one path
+	 */
+	if (rt->rt_nhn == 1)
+		goto out;
+
+	for (label_index = 0; label_index < MAX_MP_SELECT_LABELS && !bos;
+	     label_index++) {
+		if (!pskb_may_pull(skb, sizeof(*hdr) * label_index))
+			break;
+
+		/* Read and decode the current label */
+		hdr = mpls_hdr(skb) + label_index;
+		dec = mpls_entry_decode(hdr);
+
+		/* RFC6790 - reserved labels MUST NOT be used as keys
+		 * for the load-balancing function
+		 */
+		if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) {
+			hash = jhash_1word(dec.label, hash);
+
+			/* The entropy label follows the entropy label
+			 * indicator, so this means that the entropy
+			 * label was just added to the hash - no need to
+			 * go any deeper either in the label stack or in the
+			 * payload
+			 */
+			if (eli_seen)
+				break;
+		} else if (dec.label == MPLS_LABEL_ENTROPY) {
+			eli_seen = true;
+		}
+
+		bos = dec.bos;
+		if (bos && pskb_may_pull(skb, sizeof(*hdr) * label_index +
+					 sizeof(struct iphdr))) {
+			const struct iphdr *v4hdr;
+
+			v4hdr = (const struct iphdr *)(mpls_hdr(skb) +
+						       label_index);
+			if (v4hdr->version == 4) {
+				hash = jhash_3words(ntohl(v4hdr->saddr),
+						    ntohl(v4hdr->daddr),
+						    v4hdr->protocol, hash);
+			} else if (v4hdr->version == 6 &&
+				pskb_may_pull(skb, sizeof(*hdr) * label_index +
+					      sizeof(struct ipv6hdr))) {
+				const struct ipv6hdr *v6hdr;
+
+				v6hdr = (const struct ipv6hdr *)(mpls_hdr(skb) +
+								label_index);
+
+				hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
+				hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
+				hash = jhash_1word(v6hdr->nexthdr, hash);
+			}
+		}
+	}
+
+	nh_index = hash % rt->rt_nhn;
+out:
+	return &rt->rt_nh[nh_index];
+}
+
 static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
 			struct mpls_entry_decoded dec)
 {
@@ -159,6 +224,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 	struct net *net = dev_net(dev);
 	struct mpls_shim_hdr *hdr;
 	struct mpls_route *rt;
+	struct mpls_nh *nh;
 	struct mpls_entry_decoded dec;
 	struct net_device *out_dev;
 	struct mpls_dev *mdev;
@@ -196,8 +262,12 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 	if (!rt)
 		goto drop;
 
+	nh = mpls_select_multipath(rt, skb, dec.bos);
+	if (!nh)
+		goto drop;
+
 	/* Find the output device */
-	out_dev = rcu_dereference(rt->rt_dev);
+	out_dev = rcu_dereference(nh->nh_dev);
 	if (!mpls_output_possible(out_dev))
 		goto drop;
 
@@ -212,7 +282,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 	dec.ttl -= 1;
 
 	/* Verify the destination can hold the packet */
-	new_header_size = mpls_rt_header_size(rt);
+	new_header_size = mpls_nh_header_size(nh);
 	mtu = mpls_dev_mtu(out_dev);
 	if (mpls_pkt_too_big(skb, mtu - new_header_size))
 		goto drop;
@@ -240,13 +310,14 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
 		/* Push the new labels */
 		hdr = mpls_hdr(skb);
 		bos = dec.bos;
-		for (i = rt->rt_labels - 1; i >= 0; i--) {
-			hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos);
+		for (i = nh->nh_labels - 1; i >= 0; i--) {
+			hdr[i] = mpls_entry_encode(nh->nh_label[i],
+						   dec.ttl, 0, bos);
 			bos = false;
 		}
 	}
 
-	err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb);
+	err = neigh_xmit(nh->nh_via_table, out_dev, mpls_nh_via(rt, nh), skb);
 	if (err)
 		net_dbg_ratelimited("%s: packet transmission failed: %d\n",
 				    __func__, err);
@@ -270,24 +341,33 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
 struct mpls_route_config {
 	u32			rc_protocol;
 	u32			rc_ifindex;
-	u16			rc_via_table;
-	u16			rc_via_alen;
+	u8			rc_via_table;
+	u8			rc_via_alen;
 	u8			rc_via[MAX_VIA_ALEN];
 	u32			rc_label;
-	u32			rc_output_labels;
+	u8			rc_output_labels;
 	u32			rc_output_label[MAX_NEW_LABELS];
 	u32			rc_nlflags;
 	enum mpls_payload_type	rc_payload_type;
 	struct nl_info		rc_nlinfo;
+	struct rtnexthop	*rc_mp;
+	int			rc_mp_len;
 };
 
-static struct mpls_route *mpls_rt_alloc(size_t alen)
+static struct mpls_route *mpls_rt_alloc(int num_nh, u8 max_alen)
 {
+	u8 max_alen_aligned = ALIGN(max_alen, VIA_ALEN_ALIGN);
 	struct mpls_route *rt;
 
-	rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
-	if (rt)
-		rt->rt_via_alen = alen;
+	rt = kzalloc(ALIGN(sizeof(*rt) + num_nh * sizeof(*rt->rt_nh),
+			   VIA_ALEN_ALIGN) +
+		     num_nh * max_alen_aligned,
+		     GFP_KERNEL);
+	if (rt) {
+		rt->rt_nhn = num_nh;
+		rt->rt_max_alen = max_alen_aligned;
+	}
+
 	return rt;
 }
 
@@ -312,25 +392,22 @@ static void mpls_notify_route(struct net *net, unsigned index,
 }
 
 static void mpls_route_update(struct net *net, unsigned index,
-			      struct net_device *dev, struct mpls_route *new,
+			      struct mpls_route *new,
 			      const struct nl_info *info)
 {
 	struct mpls_route __rcu **platform_label;
-	struct mpls_route *rt, *old = NULL;
+	struct mpls_route *rt;
 
 	ASSERT_RTNL();
 
 	platform_label = rtnl_dereference(net->mpls.platform_label);
 	rt = rtnl_dereference(platform_label[index]);
-	if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
-		rcu_assign_pointer(platform_label[index], new);
-		old = rt;
-	}
+	rcu_assign_pointer(platform_label[index], new);
 
-	mpls_notify_route(net, index, old, new, info);
+	mpls_notify_route(net, index, rt, new, info);
 
 	/* If we removed a route free it now */
-	mpls_rt_free(old);
+	mpls_rt_free(rt);
 }
 
 static unsigned find_free_label(struct net *net)
@@ -350,7 +427,8 @@ static unsigned find_free_label(struct net *net)
 }
 
 #if IS_ENABLED(CONFIG_INET)
-static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
+static struct net_device *inet_fib_lookup_dev(struct net *net,
+					      const void *addr)
 {
 	struct net_device *dev;
 	struct rtable *rt;
@@ -369,14 +447,16 @@ static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
 	return dev;
 }
 #else
-static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
+static struct net_device *inet_fib_lookup_dev(struct net *net,
+					      const void *addr)
 {
 	return ERR_PTR(-EAFNOSUPPORT);
 }
 #endif
 
 #if IS_ENABLED(CONFIG_IPV6)
-static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
+static struct net_device *inet6_fib_lookup_dev(struct net *net,
+					       const void *addr)
 {
 	struct net_device *dev;
 	struct dst_entry *dst;
@@ -399,47 +479,234 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
 	return dev;
 }
 #else
-static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
+static struct net_device *inet6_fib_lookup_dev(struct net *net,
+					       const void *addr)
 {
 	return ERR_PTR(-EAFNOSUPPORT);
 }
 #endif
 
 static struct net_device *find_outdev(struct net *net,
-				      struct mpls_route_config *cfg)
+				      struct mpls_route *rt,
+				      struct mpls_nh *nh, int oif)
 {
 	struct net_device *dev = NULL;
 
-	if (!cfg->rc_ifindex) {
-		switch (cfg->rc_via_table) {
+	if (!oif) {
+		switch (nh->nh_via_table) {
 		case NEIGH_ARP_TABLE:
-			dev = inet_fib_lookup_dev(net, cfg->rc_via);
+			dev = inet_fib_lookup_dev(net, mpls_nh_via(rt, nh));
 			break;
 		case NEIGH_ND_TABLE:
-			dev = inet6_fib_lookup_dev(net, cfg->rc_via);
+			dev = inet6_fib_lookup_dev(net, mpls_nh_via(rt, nh));
 			break;
 		case NEIGH_LINK_TABLE:
 			break;
 		}
 	} else {
-		dev = dev_get_by_index(net, cfg->rc_ifindex);
+		dev = dev_get_by_index(net, oif);
 	}
 
 	if (!dev)
 		return ERR_PTR(-ENODEV);
 
+	/* The caller is holding rtnl anyways, so release the dev reference */
+	dev_put(dev);
+
 	return dev;
 }
 
+static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
+			      struct mpls_nh *nh, int oif)
+{
+	struct net_device *dev = NULL;
+	int err = -ENODEV;
+
+	dev = find_outdev(net, rt, nh, oif);
+	if (IS_ERR(dev)) {
+		err = PTR_ERR(dev);
+		dev = NULL;
+		goto errout;
+	}
+
+	/* Ensure this is a supported device */
+	err = -EINVAL;
+	if (!mpls_dev_get(dev))
+		goto errout;
+
+	RCU_INIT_POINTER(nh->nh_dev, dev);
+
+	return 0;
+
+errout:
+	return err;
+}
+
+static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
+				  struct mpls_route *rt)
+{
+	struct net *net = cfg->rc_nlinfo.nl_net;
+	struct mpls_nh *nh = rt->rt_nh;
+	int err;
+	int i;
+
+	if (!nh)
+		return -ENOMEM;
+
+	err = -EINVAL;
+	/* Ensure only a supported number of labels are present */
+	if (cfg->rc_output_labels > MAX_NEW_LABELS)
+		goto errout;
+
+	nh->nh_labels = cfg->rc_output_labels;
+	for (i = 0; i < nh->nh_labels; i++)
+		nh->nh_label[i] = cfg->rc_output_label[i];
+
+	nh->nh_via_table = cfg->rc_via_table;
+	memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen);
+	nh->nh_via_alen = cfg->rc_via_alen;
+
+	err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex);
+	if (err)
+		goto errout;
+
+	return 0;
+
+errout:
+	return err;
+}
+
+static int mpls_nh_build(struct net *net, struct mpls_route *rt,
+			 struct mpls_nh *nh, int oif,
+			 struct nlattr *via, struct nlattr *newdst)
+{
+	int err = -ENOMEM;
+
+	if (!nh)
+		goto errout;
+
+	if (newdst) {
+		err = nla_get_labels(newdst, MAX_NEW_LABELS,
+				     &nh->nh_labels, nh->nh_label);
+		if (err)
+			goto errout;
+	}
+
+	err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
+			  __mpls_nh_via(rt, nh));
+	if (err)
+		goto errout;
+
+	err = mpls_nh_assign_dev(net, rt, nh, oif);
+	if (err)
+		goto errout;
+
+	return 0;
+
+errout:
+	return err;
+}
+
+static int mpls_count_nexthops(struct rtnexthop *rtnh, int len,
+			       u8 cfg_via_alen, u8 *max_via_alen)
+{
+	int nhs = 0;
+	int remaining = len;
+
+	if (!rtnh) {
+		*max_via_alen = cfg_via_alen;
+		return 1;
+	}
+
+	*max_via_alen = 0;
+
+	while (rtnh_ok(rtnh, remaining)) {
+		struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+		int attrlen;
+
+		attrlen = rtnh_attrlen(rtnh);
+		nla = nla_find(attrs, attrlen, RTA_VIA);
+		if (nla && nla_len(nla) >=
+		    offsetof(struct rtvia, rtvia_addr)) {
+			int via_alen = nla_len(nla) -
+				offsetof(struct rtvia, rtvia_addr);
+
+			if (via_alen <= MAX_VIA_ALEN)
+				*max_via_alen = max_t(u16, *max_via_alen,
+						      via_alen);
+		}
+
+		nhs++;
+		rtnh = rtnh_next(rtnh, &remaining);
+	}
+
+	/* leftover implies invalid nexthop configuration, discard it */
+	return remaining > 0 ? 0 : nhs;
+}
+
+static int mpls_nh_build_multi(struct mpls_route_config *cfg,
+			       struct mpls_route *rt)
+{
+	struct rtnexthop *rtnh = cfg->rc_mp;
+	struct nlattr *nla_via, *nla_newdst;
+	int remaining = cfg->rc_mp_len;
+	int nhs = 0;
+	int err = 0;
+
+	change_nexthops(rt) {
+		int attrlen;
+
+		nla_via = NULL;
+		nla_newdst = NULL;
+
+		err = -EINVAL;
+		if (!rtnh_ok(rtnh, remaining))
+			goto errout;
+
+		/* neither weighted multipath nor any flags
+		 * are supported
+		 */
+		if (rtnh->rtnh_hops || rtnh->rtnh_flags)
+			goto errout;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *attrs = rtnh_attrs(rtnh);
+
+			nla_via = nla_find(attrs, attrlen, RTA_VIA);
+			nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
+		}
+
+		if (!nla_via)
+			goto errout;
+
+		err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
+				    rtnh->rtnh_ifindex, nla_via,
+				    nla_newdst);
+		if (err)
+			goto errout;
+
+		rtnh = rtnh_next(rtnh, &remaining);
+		nhs++;
+	} endfor_nexthops(rt);
+
+	rt->rt_nhn = nhs;
+
+	return 0;
+
+errout:
+	return err;
+}
+
 static int mpls_route_add(struct mpls_route_config *cfg)
 {
 	struct mpls_route __rcu **platform_label;
 	struct net *net = cfg->rc_nlinfo.nl_net;
-	struct net_device *dev = NULL;
 	struct mpls_route *rt, *old;
-	unsigned index;
-	int i;
 	int err = -EINVAL;
+	u8 max_via_alen;
+	unsigned index;
+	int nhs;
 
 	index = cfg->rc_label;
 
@@ -457,27 +724,6 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 	if (index >= net->mpls.platform_labels)
 		goto errout;
 
-	/* Ensure only a supported number of labels are present */
-	if (cfg->rc_output_labels > MAX_NEW_LABELS)
-		goto errout;
-
-	dev = find_outdev(net, cfg);
-	if (IS_ERR(dev)) {
-		err = PTR_ERR(dev);
-		dev = NULL;
-		goto errout;
-	}
-
-	/* Ensure this is a supported device */
-	err = -EINVAL;
-	if (!mpls_dev_get(dev))
-		goto errout;
-
-	err = -EINVAL;
-	if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
-	    (dev->addr_len != cfg->rc_via_alen))
-		goto errout;
-
 	/* Append makes no sense with mpls */
 	err = -EOPNOTSUPP;
 	if (cfg->rc_nlflags & NLM_F_APPEND)
@@ -497,28 +743,34 @@ static int mpls_route_add(struct mpls_route_config *cfg)
 	if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
 		goto errout;
 
+	err = -EINVAL;
+	nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
+				  cfg->rc_via_alen, &max_via_alen);
+	if (nhs == 0)
+		goto errout;
+
 	err = -ENOMEM;
-	rt = mpls_rt_alloc(cfg->rc_via_alen);
+	rt = mpls_rt_alloc(nhs, max_via_alen);
 	if (!rt)
 		goto errout;
 
-	rt->rt_labels = cfg->rc_output_labels;
-	for (i = 0; i < rt->rt_labels; i++)
-		rt->rt_label[i] = cfg->rc_output_label[i];
 	rt->rt_protocol = cfg->rc_protocol;
-	RCU_INIT_POINTER(rt->rt_dev, dev);
 	rt->rt_payload_type = cfg->rc_payload_type;
-	rt->rt_via_table = cfg->rc_via_table;
-	memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
 
-	mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo);
+	if (cfg->rc_mp)
+		err = mpls_nh_build_multi(cfg, rt);
+	else
+		err = mpls_nh_build_from_cfg(cfg, rt);
+	if (err)
+		goto freert;
+
+	mpls_route_update(net, index, rt, &cfg->rc_nlinfo);
 
-	dev_put(dev);
 	return 0;
 
+freert:
+	mpls_rt_free(rt);
 errout:
-	if (dev)
-		dev_put(dev);
 	return err;
 }
 
@@ -538,7 +790,7 @@ static int mpls_route_del(struct mpls_route_config *cfg)
 	if (index >= net->mpls.platform_labels)
 		goto errout;
 
-	mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo);
+	mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
 
 	err = 0;
 errout:
@@ -635,9 +887,11 @@ static void mpls_ifdown(struct net_device *dev)
 		struct mpls_route *rt = rtnl_dereference(platform_label[index]);
 		if (!rt)
 			continue;
-		if (rtnl_dereference(rt->rt_dev) != dev)
-			continue;
-		rt->rt_dev = NULL;
+		for_nexthops(rt) {
+			if (rtnl_dereference(nh->nh_dev) != dev)
+				continue;
+			nh->nh_dev = NULL;
+		} endfor_nexthops(rt);
 	}
 
 	mdev = mpls_dev_get(dev);
@@ -736,7 +990,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
 EXPORT_SYMBOL_GPL(nla_put_labels);
 
 int nla_get_labels(const struct nlattr *nla,
-		   u32 max_labels, u32 *labels, u32 label[])
+		   u32 max_labels, u8 *labels, u32 label[])
 {
 	unsigned len = nla_len(nla);
 	unsigned nla_labels;
@@ -781,6 +1035,48 @@ int nla_get_labels(const struct nlattr *nla,
 }
 EXPORT_SYMBOL_GPL(nla_get_labels);
 
+int nla_get_via(const struct nlattr *nla, u8 *via_alen,
+		u8 *via_table, u8 via_addr[])
+{
+	struct rtvia *via = nla_data(nla);
+	int err = -EINVAL;
+	int alen;
+
+	if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
+		goto errout;
+	alen = nla_len(nla) -
+			offsetof(struct rtvia, rtvia_addr);
+	if (alen > MAX_VIA_ALEN)
+		goto errout;
+
+	/* Validate the address family */
+	switch (via->rtvia_family) {
+	case AF_PACKET:
+		*via_table = NEIGH_LINK_TABLE;
+		break;
+	case AF_INET:
+		*via_table = NEIGH_ARP_TABLE;
+		if (alen != 4)
+			goto errout;
+		break;
+	case AF_INET6:
+		*via_table = NEIGH_ND_TABLE;
+		if (alen != 16)
+			goto errout;
+		break;
+	default:
+		/* Unsupported address family */
+		goto errout;
+	}
+
+	memcpy(via_addr, via->rtvia_addr, alen);
+	*via_alen = alen;
+	err = 0;
+
+errout:
+	return err;
+}
+
 static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 			       struct mpls_route_config *cfg)
 {
@@ -844,7 +1140,7 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 			break;
 		case RTA_DST:
 		{
-			u32 label_count;
+			u8 label_count;
 			if (nla_get_labels(nla, 1, &label_count,
 					   &cfg->rc_label))
 				goto errout;
@@ -857,35 +1153,15 @@ static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
 		}
 		case RTA_VIA:
 		{
-			struct rtvia *via = nla_data(nla);
-			if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
-				goto errout;
-			cfg->rc_via_alen   = nla_len(nla) -
-				offsetof(struct rtvia, rtvia_addr);
-			if (cfg->rc_via_alen > MAX_VIA_ALEN)
+			if (nla_get_via(nla, &cfg->rc_via_alen,
+					&cfg->rc_via_table, cfg->rc_via))
 				goto errout;
-
-			/* Validate the address family */
-			switch(via->rtvia_family) {
-			case AF_PACKET:
-				cfg->rc_via_table = NEIGH_LINK_TABLE;
-				break;
-			case AF_INET:
-				cfg->rc_via_table = NEIGH_ARP_TABLE;
-				if (cfg->rc_via_alen != 4)
-					goto errout;
-				break;
-			case AF_INET6:
-				cfg->rc_via_table = NEIGH_ND_TABLE;
-				if (cfg->rc_via_alen != 16)
-					goto errout;
-				break;
-			default:
-				/* Unsupported address family */
-				goto errout;
-			}
-
-			memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
+			break;
+		}
+		case RTA_MULTIPATH:
+		{
+			cfg->rc_mp = nla_data(nla);
+			cfg->rc_mp_len = nla_len(nla);
 			break;
 		}
 		default:
@@ -946,16 +1222,52 @@ static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	rtm->rtm_type = RTN_UNICAST;
 	rtm->rtm_flags = 0;
 
-	if (rt->rt_labels &&
-	    nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
-		goto nla_put_failure;
-	if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen))
-		goto nla_put_failure;
-	dev = rtnl_dereference(rt->rt_dev);
-	if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
-		goto nla_put_failure;
 	if (nla_put_labels(skb, RTA_DST, 1, &label))
 		goto nla_put_failure;
+	if (rt->rt_nhn == 1) {
+		const struct mpls_nh *nh = rt->rt_nh;
+
+		if (nh->nh_labels &&
+		    nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
+				   nh->nh_label))
+			goto nla_put_failure;
+		if (nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
+				nh->nh_via_alen))
+			goto nla_put_failure;
+		dev = rtnl_dereference(nh->nh_dev);
+		if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
+			goto nla_put_failure;
+	} else {
+		struct rtnexthop *rtnh;
+		struct nlattr *mp;
+
+		mp = nla_nest_start(skb, RTA_MULTIPATH);
+		if (!mp)
+			goto nla_put_failure;
+
+		for_nexthops(rt) {
+			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
+			if (!rtnh)
+				goto nla_put_failure;
+
+			dev = rtnl_dereference(nh->nh_dev);
+			if (dev)
+				rtnh->rtnh_ifindex = dev->ifindex;
+			if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
+							    nh->nh_labels,
+							    nh->nh_label))
+				goto nla_put_failure;
+			if (nla_put_via(skb, nh->nh_via_table,
+					mpls_nh_via(rt, nh),
+					nh->nh_via_alen))
+				goto nla_put_failure;
+
+			/* length of rtnetlink header + attributes */
+			rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
+		} endfor_nexthops(rt);
+
+		nla_nest_end(skb, mp);
+	}
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -1000,12 +1312,30 @@ static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
 {
 	size_t payload =
 		NLMSG_ALIGN(sizeof(struct rtmsg))
-		+ nla_total_size(2 + rt->rt_via_alen)	/* RTA_VIA */
 		+ nla_total_size(4);			/* RTA_DST */
-	if (rt->rt_labels)				/* RTA_NEWDST */
-		payload += nla_total_size(rt->rt_labels * 4);
-	if (rt->rt_dev)					/* RTA_OIF */
-		payload += nla_total_size(4);
+
+	if (rt->rt_nhn == 1) {
+		struct mpls_nh *nh = rt->rt_nh;
+
+		if (nh->nh_dev)
+			payload += nla_total_size(4); /* RTA_OIF */
+		payload += nla_total_size(2 + nh->nh_via_alen); /* RTA_VIA */
+		if (nh->nh_labels) /* RTA_NEWDST */
+			payload += nla_total_size(nh->nh_labels * 4);
+	} else {
+		/* each nexthop is packed in an attribute */
+		size_t nhsize = 0;
+
+		for_nexthops(rt) {
+			nhsize += nla_total_size(sizeof(struct rtnexthop));
+			nhsize += nla_total_size(2 + nh->nh_via_alen);
+			if (nh->nh_labels)
+				nhsize += nla_total_size(nh->nh_labels * 4);
+		} endfor_nexthops(rt);
+		/* nested attribute */
+		payload += nla_total_size(nhsize);
+	}
+
 	return payload;
 }
 
@@ -1057,25 +1387,29 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 	/* In case the predefined labels need to be populated */
 	if (limit > MPLS_LABEL_IPV4NULL) {
 		struct net_device *lo = net->loopback_dev;
-		rt0 = mpls_rt_alloc(lo->addr_len);
+		rt0 = mpls_rt_alloc(1, lo->addr_len);
 		if (!rt0)
 			goto nort0;
-		RCU_INIT_POINTER(rt0->rt_dev, lo);
+		RCU_INIT_POINTER(rt0->rt_nh->nh_dev, lo);
 		rt0->rt_protocol = RTPROT_KERNEL;
 		rt0->rt_payload_type = MPT_IPV4;
-		rt0->rt_via_table = NEIGH_LINK_TABLE;
-		memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
+		rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
+		rt0->rt_nh->nh_via_alen = lo->addr_len;
+		memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
+		       lo->addr_len);
 	}
 	if (limit > MPLS_LABEL_IPV6NULL) {
 		struct net_device *lo = net->loopback_dev;
-		rt2 = mpls_rt_alloc(lo->addr_len);
+		rt2 = mpls_rt_alloc(1, lo->addr_len);
 		if (!rt2)
 			goto nort2;
-		RCU_INIT_POINTER(rt2->rt_dev, lo);
+		RCU_INIT_POINTER(rt2->rt_nh->nh_dev, lo);
 		rt2->rt_protocol = RTPROT_KERNEL;
 		rt2->rt_payload_type = MPT_IPV6;
-		rt2->rt_via_table = NEIGH_LINK_TABLE;
-		memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
+		rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
+		rt2->rt_nh->nh_via_alen = lo->addr_len;
+		memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
+		       lo->addr_len);
 	}
 
 	rtnl_lock();
@@ -1085,7 +1419,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 
 	/* Free any labels beyond the new table */
 	for (index = limit; index < old_limit; index++)
-		mpls_route_update(net, index, NULL, NULL, NULL);
+		mpls_route_update(net, index, NULL, NULL);
 
 	/* Copy over the old labels */
 	cp_size = size;
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 2681a4ba6c37..bde52ce88c94 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -21,6 +21,76 @@ struct mpls_dev {
 
 struct sk_buff;
 
+#define LABEL_NOT_SPECIFIED (1 << 20)
+#define MAX_NEW_LABELS 2
+
+/* This maximum ha length copied from the definition of struct neighbour */
+#define VIA_ALEN_ALIGN sizeof(unsigned long)
+#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, VIA_ALEN_ALIGN))
+
+enum mpls_payload_type {
+	MPT_UNSPEC, /* IPv4 or IPv6 */
+	MPT_IPV4 = 4,
+	MPT_IPV6 = 6,
+
+	/* Other types not implemented:
+	 *  - Pseudo-wire with or without control word (RFC4385)
+	 *  - GAL (RFC5586)
+	 */
+};
+
+struct mpls_nh { /* next hop label forwarding entry */
+	struct net_device __rcu *nh_dev;
+	u32			nh_label[MAX_NEW_LABELS];
+	u8			nh_labels;
+	u8			nh_via_alen;
+	u8			nh_via_table;
+};
+
+/* The route, nexthops and vias are stored together in the same memory
+ * block:
+ *
+ * +----------------------+
+ * | mpls_route           |
+ * +----------------------+
+ * | mpls_nh 0            |
+ * +----------------------+
+ * | ...                  |
+ * +----------------------+
+ * | mpls_nh n-1          |
+ * +----------------------+
+ * | alignment padding    |
+ * +----------------------+
+ * | via[rt_max_alen] 0   |
+ * +----------------------+
+ * | ...                  |
+ * +----------------------+
+ * | via[rt_max_alen] n-1 |
+ * +----------------------+
+ */
+struct mpls_route { /* next hop label forwarding entry */
+	struct rcu_head		rt_rcu;
+	u8			rt_protocol;
+	u8			rt_payload_type;
+	u8			rt_max_alen;
+	unsigned int		rt_nhn;
+	struct mpls_nh		rt_nh[0];
+};
+
+#define for_nexthops(rt) {						\
+	int nhsel; struct mpls_nh *nh;			\
+	for (nhsel = 0, nh = (rt)->rt_nh;				\
+	     nhsel < (rt)->rt_nhn;					\
+	     nh++, nhsel++)
+
+#define change_nexthops(rt) {						\
+	int nhsel; struct mpls_nh *nh;				\
+	for (nhsel = 0,	nh = (struct mpls_nh *)((rt)->rt_nh);	\
+	     nhsel < (rt)->rt_nhn;					\
+	     nh++, nhsel++)
+
+#define endfor_nexthops(rt) }
+
 static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
 {
 	return (struct mpls_shim_hdr *)skb_network_header(skb);
@@ -52,8 +122,10 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *
 
 int nla_put_labels(struct sk_buff *skb, int attrtype,  u8 labels,
 		   const u32 label[]);
-int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels,
+int nla_get_labels(const struct nlattr *nla, u32 max_labels, u8 *labels,
 		   u32 label[]);
+int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
+		u8 via[]);
 bool mpls_output_possible(const struct net_device *dev);
 unsigned int mpls_dev_mtu(const struct net_device *dev);
 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 21e70bc9af98..67591aef9cae 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
 	return en->labels * sizeof(struct mpls_shim_hdr);
 }
 
-int mpls_output(struct sock *sk, struct sk_buff *skb)
+int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct mpls_iptunnel_encap *tun_encap_info;
 	struct mpls_shim_hdr *hdr;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 3e1b4abf1897..e22349ea7256 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -354,7 +354,7 @@ config NF_CT_NETLINK_HELPER
 	select NETFILTER_NETLINK
 	depends on NF_CT_NETLINK
 	depends on NETFILTER_NETLINK_QUEUE
-	depends on NETFILTER_NETLINK_QUEUE_CT
+	depends on NETFILTER_NETLINK_GLUE_CT
 	depends on NETFILTER_ADVANCED
 	help
 	  This option enables the user-space connection tracking helpers
@@ -362,13 +362,14 @@ config NF_CT_NETLINK_HELPER
 
 	  If unsure, say `N'.
 
-config NETFILTER_NETLINK_QUEUE_CT
-        bool "NFQUEUE integration with Connection Tracking"
-        default n
-        depends on NETFILTER_NETLINK_QUEUE
+config NETFILTER_NETLINK_GLUE_CT
+	bool "NFQUEUE and NFLOG integration with Connection Tracking"
+	default n
+	depends on (NETFILTER_NETLINK_QUEUE || NETFILTER_NETLINK_LOG) && NF_CT_NETLINK
 	help
-	  If this option is enabled, NFQUEUE can include Connection Tracking
-	  information together with the packet is the enqueued via NFNETLINK.
+	  If this option is enabled, NFQUEUE and NFLOG can include
+	  Connection Tracking information together with the packet is
+	  the enqueued via NFNETLINK.
 
 config NF_NAT
 	tristate
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 70d026d46fe7..7638c36b498c 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -10,8 +10,6 @@ obj-$(CONFIG_NETFILTER) = netfilter.o
 
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
-nfnetlink_queue-y := nfnetlink_queue_core.o
-nfnetlink_queue-$(CONFIG_NETFILTER_NETLINK_QUEUE_CT) += nfnetlink_queue_ct.o
 obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
 obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 21a085686dc1..f39276d1c2d7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -271,7 +271,7 @@ unsigned int nf_iterate(struct list_head *head,
 		/* Optimization: we don't need to hold module
 		   reference here, since function can't sleep. --RR */
 repeat:
-		verdict = (*elemp)->hook(*elemp, skb, state);
+		verdict = (*elemp)->hook((*elemp)->priv, skb, state);
 		if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
 			if (unlikely((verdict & NF_VERDICT_MASK)
@@ -315,8 +315,6 @@ next_hook:
 		int err = nf_queue(skb, elem, state,
 				   verdict >> NF_VERDICT_QBITS);
 		if (err < 0) {
-			if (err == -ECANCELED)
-				goto next_hook;
 			if (err == -ESRCH &&
 			   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
 				goto next_hook;
@@ -350,6 +348,12 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 }
 EXPORT_SYMBOL(skb_make_writable);
 
+/* This needs to be compiled in any case to avoid dependencies between the
+ * nfnetlink_queue code and nf_conntrack.
+ */
+struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfnl_ct_hook);
+
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 /* This does not belong here, but locally generated errors need it if connection
    tracking in use: without this, connection may not be in hash table, and hence
@@ -387,9 +391,6 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
 }
 EXPORT_SYMBOL(nf_conntrack_destroy);
 
-struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
-EXPORT_SYMBOL_GPL(nfq_ct_hook);
-
 /* Built-in default zone used e.g. by modules. */
 const struct nf_conntrack_zone nf_ct_zone_dflt = {
 	.id	= NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 338b4047776f..69ab9c2634e1 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -519,8 +519,7 @@ int
 ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(
-			dev_net(par->in ? par->in : par->out), index);
+	struct ip_set *set = ip_set_rcu_get(par->net, index);
 	int ret = 0;
 
 	BUG_ON(!set);
@@ -558,8 +557,7 @@ int
 ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(
-			dev_net(par->in ? par->in : par->out), index);
+	struct ip_set *set = ip_set_rcu_get(par->net, index);
 	int ret;
 
 	BUG_ON(!set);
@@ -581,8 +579,7 @@ int
 ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)
 {
-	struct ip_set *set = ip_set_rcu_get(
-			dev_net(par->in ? par->in : par->out), index);
+	struct ip_set *set = ip_set_rcu_get(par->net, index);
 	int ret = 0;
 
 	BUG_ON(!set);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index dfd7b65b3d2a..0328f7250693 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -75,7 +75,7 @@ static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
  *	Allocate/initialize app incarnation and register it in proto apps.
  */
 static int
-ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
 		  __u16 port)
 {
 	struct ip_vs_protocol *pp;
@@ -107,7 +107,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
 		}
 	}
 
-	ret = pp->register_app(net, inc);
+	ret = pp->register_app(ipvs, inc);
 	if (ret)
 		goto out;
 
@@ -127,7 +127,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
  *	Release app incarnation
  */
 static void
-ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 {
 	struct ip_vs_protocol *pp;
 
@@ -135,7 +135,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
 		return;
 
 	if (pp->unregister_app)
-		pp->unregister_app(net, inc);
+		pp->unregister_app(ipvs, inc);
 
 	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
 		  pp->name, inc->name, ntohs(inc->port));
@@ -175,14 +175,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
  *	Register an application incarnation in protocol applications
  */
 int
-register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
 		       __u16 port)
 {
 	int result;
 
 	mutex_lock(&__ip_vs_app_mutex);
 
-	result = ip_vs_app_inc_new(net, app, proto, port);
+	result = ip_vs_app_inc_new(ipvs, app, proto, port);
 
 	mutex_unlock(&__ip_vs_app_mutex);
 
@@ -191,15 +191,11 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
 
 
 /* Register application for netns */
-struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
+struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_app *a;
 	int err = 0;
 
-	if (!ipvs)
-		return ERR_PTR(-ENOENT);
-
 	mutex_lock(&__ip_vs_app_mutex);
 
 	list_for_each_entry(a, &ipvs->app_list, a_list) {
@@ -230,21 +226,17 @@ out_unlock:
  *	We are sure there are no app incarnations attached to services
  *	Caller should use synchronize_rcu() or rcu_barrier()
  */
-void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
+void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_app *a, *anxt, *inc, *nxt;
 
-	if (!ipvs)
-		return;
-
 	mutex_lock(&__ip_vs_app_mutex);
 
 	list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
 		if (app && strcmp(app->name, a->name))
 			continue;
 		list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
-			ip_vs_app_inc_release(net, inc);
+			ip_vs_app_inc_release(ipvs, inc);
 		}
 
 		list_del(&a->a_list);
@@ -611,17 +603,19 @@ static const struct file_operations ip_vs_app_fops = {
 };
 #endif
 
-int __net_init ip_vs_app_net_init(struct net *net)
+int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct net *net = ipvs->net;
 
 	INIT_LIST_HEAD(&ipvs->app_list);
 	proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
 	return 0;
 }
 
-void __net_exit ip_vs_app_net_cleanup(struct net *net)
+void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
 {
-	unregister_ip_vs_app(net, NULL /* all */);
+	struct net *net = ipvs->net;
+
+	unregister_ip_vs_app(ipvs, NULL /* all */);
 	remove_proc_entry("ip_vs_app", net->proc_net);
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b0f7b626b56d..85ca189bdc3d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -108,7 +108,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
 /*
  *	Returns hash value for IPVS connection entry
  */
-static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto,
+static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
 				       const union nf_inet_addr *addr,
 				       __be16 port)
 {
@@ -116,11 +116,11 @@ static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int pro
 	if (af == AF_INET6)
 		return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
 				    (__force u32)port, proto, ip_vs_conn_rnd) ^
-			((size_t)net>>8)) & ip_vs_conn_tab_mask;
+			((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
 #endif
 	return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
 			    ip_vs_conn_rnd) ^
-		((size_t)net>>8)) & ip_vs_conn_tab_mask;
+		((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
 }
 
 static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -141,14 +141,14 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
 		port = p->vport;
 	}
 
-	return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
+	return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port);
 }
 
 static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
 {
 	struct ip_vs_conn_param p;
 
-	ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+	ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol,
 			      &cp->caddr, cp->cport, NULL, 0, &p);
 
 	if (cp->pe) {
@@ -279,7 +279,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
 		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
 		    p->protocol == cp->protocol &&
-		    ip_vs_conn_net_eq(cp, p->net)) {
+		    cp->ipvs == p->ipvs) {
 			if (!__ip_vs_conn_get(cp))
 				continue;
 			/* HIT */
@@ -314,33 +314,34 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
 }
 
 static int
-ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
+ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
+			    int af, const struct sk_buff *skb,
 			    const struct ip_vs_iphdr *iph,
-			    int inverse, struct ip_vs_conn_param *p)
+			    struct ip_vs_conn_param *p)
 {
 	__be16 _ports[2], *pptr;
-	struct net *net = skb_net(skb);
 
 	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
 	if (pptr == NULL)
 		return 1;
 
-	if (likely(!inverse))
-		ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+	if (likely(!ip_vs_iph_inverse(iph)))
+		ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->saddr,
 				      pptr[0], &iph->daddr, pptr[1], p);
 	else
-		ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+		ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->daddr,
 				      pptr[1], &iph->saddr, pptr[0], p);
 	return 0;
 }
 
 struct ip_vs_conn *
-ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
-			const struct ip_vs_iphdr *iph, int inverse)
+ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af,
+			const struct sk_buff *skb,
+			const struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_conn_param p;
 
-	if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
+	if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
 		return NULL;
 
 	return ip_vs_conn_in_get(&p);
@@ -359,7 +360,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 
 	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
 		if (unlikely(p->pe_data && p->pe->ct_match)) {
-			if (!ip_vs_conn_net_eq(cp, p->net))
+			if (cp->ipvs != p->ipvs)
 				continue;
 			if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
 				if (__ip_vs_conn_get(cp))
@@ -377,7 +378,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
 		    p->vport == cp->vport && p->cport == cp->cport &&
 		    cp->flags & IP_VS_CONN_F_TEMPLATE &&
 		    p->protocol == cp->protocol &&
-		    ip_vs_conn_net_eq(cp, p->net)) {
+		    cp->ipvs == p->ipvs) {
 			if (__ip_vs_conn_get(cp))
 				goto out;
 		}
@@ -418,7 +419,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
 		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
 		    p->protocol == cp->protocol &&
-		    ip_vs_conn_net_eq(cp, p->net)) {
+		    cp->ipvs == p->ipvs) {
 			if (!__ip_vs_conn_get(cp))
 				continue;
 			/* HIT */
@@ -439,12 +440,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
 }
 
 struct ip_vs_conn *
-ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
-			 const struct ip_vs_iphdr *iph, int inverse)
+ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
+			 const struct sk_buff *skb,
+			 const struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_conn_param p;
 
-	if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
+	if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
 		return NULL;
 
 	return ip_vs_conn_out_get(&p);
@@ -638,7 +640,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 	 * so we can make the assumption that the svc_af is the same as the
 	 * dest_af
 	 */
-	dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
+	dest = ip_vs_find_dest(cp->ipvs, cp->af, cp->af, &cp->daddr,
 			       cp->dport, &cp->vaddr, cp->vport,
 			       cp->protocol, cp->fwmark, cp->flags);
 	if (dest) {
@@ -668,7 +670,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
 #endif
 			ip_vs_bind_xmit(cp);
 
-		pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);
+		pd = ip_vs_proto_data_get(cp->ipvs, cp->protocol);
 		if (pd && atomic_read(&pd->appcnt))
 			ip_vs_bind_app(cp, pd->pp);
 	}
@@ -746,7 +748,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs,
 int ip_vs_check_template(struct ip_vs_conn *ct)
 {
 	struct ip_vs_dest *dest = ct->dest;
-	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
+	struct netns_ipvs *ipvs = ct->ipvs;
 
 	/*
 	 * Checking the dest server status.
@@ -800,8 +802,7 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
 static void ip_vs_conn_expire(unsigned long data)
 {
 	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
-	struct net *net = ip_vs_conn_net(cp);
-	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct netns_ipvs *ipvs = cp->ipvs;
 
 	/*
 	 *	do I control anybody?
@@ -847,7 +848,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	cp->timeout = 60*HZ;
 
 	if (ipvs->sync_state & IP_VS_STATE_MASTER)
-		ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
+		ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
 
 	ip_vs_conn_put(cp);
 }
@@ -875,8 +876,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
 	       struct ip_vs_dest *dest, __u32 fwmark)
 {
 	struct ip_vs_conn *cp;
-	struct netns_ipvs *ipvs = net_ipvs(p->net);
-	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+	struct netns_ipvs *ipvs = p->ipvs;
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
 							   p->protocol);
 
 	cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
@@ -887,7 +888,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
 
 	INIT_HLIST_NODE(&cp->c_list);
 	setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
-	ip_vs_conn_net_set(cp, p->net);
+	cp->ipvs	   = ipvs;
 	cp->af		   = p->af;
 	cp->daf		   = dest_af;
 	cp->protocol	   = p->protocol;
@@ -1061,7 +1062,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
 		size_t len = 0;
 		char dbuf[IP_VS_ADDRSTRLEN];
 
-		if (!ip_vs_conn_net_eq(cp, net))
+		if (!net_eq(cp->ipvs->net, net))
 			return 0;
 		if (cp->pe_data) {
 			pe_data[0] = ' ';
@@ -1146,7 +1147,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
 		const struct ip_vs_conn *cp = v;
 		struct net *net = seq_file_net(seq);
 
-		if (!ip_vs_conn_net_eq(cp, net))
+		if (!net_eq(cp->ipvs->net, net))
 			return 0;
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1240,7 +1241,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
 }
 
 /* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(struct net *net)
+void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
 {
 	int idx;
 	struct ip_vs_conn *cp, *cp_c;
@@ -1256,7 +1257,7 @@ void ip_vs_random_dropentry(struct net *net)
 			if (cp->flags & IP_VS_CONN_F_TEMPLATE)
 				/* connection template */
 				continue;
-			if (!ip_vs_conn_net_eq(cp, net))
+			if (cp->ipvs != ipvs)
 				continue;
 			if (cp->protocol == IPPROTO_TCP) {
 				switch(cp->state) {
@@ -1308,18 +1309,17 @@ void ip_vs_random_dropentry(struct net *net)
 /*
  *      Flush all the connection entries in the ip_vs_conn_tab
  */
-static void ip_vs_conn_flush(struct net *net)
+static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
 {
 	int idx;
 	struct ip_vs_conn *cp, *cp_c;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 
 flush_again:
 	rcu_read_lock();
 	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
 
 		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
-			if (!ip_vs_conn_net_eq(cp, net))
+			if (cp->ipvs != ipvs)
 				continue;
 			IP_VS_DBG(4, "del connection\n");
 			ip_vs_conn_expire_now(cp);
@@ -1345,23 +1345,22 @@ flush_again:
 /*
  * per netns init and exit
  */
-int __net_init ip_vs_conn_net_init(struct net *net)
+int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
 	atomic_set(&ipvs->conn_count, 0);
 
-	proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops);
-	proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops);
+	proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops);
+	proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+		    &ip_vs_conn_sync_fops);
 	return 0;
 }
 
-void __net_exit ip_vs_conn_net_cleanup(struct net *net)
+void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
 {
 	/* flush all the connection entries first */
-	ip_vs_conn_flush(net);
-	remove_proc_entry("ip_vs_conn", net->proc_net);
-	remove_proc_entry("ip_vs_conn_sync", net->proc_net);
+	ip_vs_conn_flush(ipvs);
+	remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+	remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
 }
 
 int __init ip_vs_conn_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 38fbc194b9cb..1e24fff53e4b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -112,7 +112,7 @@ static inline void
 ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest = cp->dest;
-	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+	struct netns_ipvs *ipvs = cp->ipvs;
 
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		struct ip_vs_cpu_stats *s;
@@ -146,7 +146,7 @@ static inline void
 ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 {
 	struct ip_vs_dest *dest = cp->dest;
-	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+	struct netns_ipvs *ipvs = cp->ipvs;
 
 	if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		struct ip_vs_cpu_stats *s;
@@ -179,7 +179,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 static inline void
 ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 {
-	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	struct netns_ipvs *ipvs = svc->ipvs;
 	struct ip_vs_cpu_stats *s;
 
 	s = this_cpu_ptr(cp->dest->stats.cpustats);
@@ -215,7 +215,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
 			      const union nf_inet_addr *vaddr, __be16 vport,
 			      struct ip_vs_conn_param *p)
 {
-	ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+	ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr,
 			      vport, p);
 	p->pe = rcu_dereference(svc->pe);
 	if (p->pe && p->pe->fill_param)
@@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
+	const union nf_inet_addr *src_addr, *dst_addr;
+
+	if (likely(!ip_vs_iph_inverse(iph))) {
+		src_addr = &iph->saddr;
+		dst_addr = &iph->daddr;
+	} else {
+		src_addr = &iph->daddr;
+		dst_addr = &iph->saddr;
+	}
+
 
 	/* Mask saddr with the netmask to adjust template granularity */
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6)
-		ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
+		ipv6_addr_prefix(&snet.in6, &src_addr->in6,
 				 (__force __u32) svc->netmask);
 	else
 #endif
-		snet.ip = iph->saddr.ip & svc->netmask;
+		snet.ip = src_addr->ip & svc->netmask;
 
 	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
 		      "mnet %s\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
-		      IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
+		      IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port),
+		      IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port),
 		      IP_VS_DBG_ADDR(svc->af, &snet));
 
 	/*
@@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	 */
 	{
 		int protocol = iph->protocol;
-		const union nf_inet_addr *vaddr = &iph->daddr;
+		const union nf_inet_addr *vaddr = dst_addr;
 		__be16 vport = 0;
 
 		if (dst_port == svc->port) {
@@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/*
 	 *    Create a new connection according to the template
 	 */
-	ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
-			      src_port, &iph->daddr, dst_port, &param);
+	ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr,
+			      src_port, dst_addr, dst_port, &param);
 
 	cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
 			    skb->mark);
@@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_scheduler *sched;
 	struct ip_vs_dest *dest;
-	__be16 _ports[2], *pptr;
+	__be16 _ports[2], *pptr, cport, vport;
+	const void *caddr, *vaddr;
 	unsigned int flags;
 
 	*ignored = 1;
@@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	if (pptr == NULL)
 		return NULL;
 
+	if (likely(!ip_vs_iph_inverse(iph))) {
+		cport = pptr[0];
+		caddr = &iph->saddr;
+		vport = pptr[1];
+		vaddr = &iph->daddr;
+	} else {
+		cport = pptr[1];
+		caddr = &iph->daddr;
+		vport = pptr[0];
+		vaddr = &iph->saddr;
+	}
+
 	/*
 	 * FTPDATA needs this check when using local real server.
 	 * Never schedule Active FTPDATA connections from real server.
 	 * For LVS-NAT they must be already created. For other methods
 	 * with persistence the connection is created on SYN+ACK.
 	 */
-	if (pptr[0] == FTPDATA) {
-		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
+	if (cport == FTPDATA) {
+		IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
 			      "Not scheduling FTPDATA");
 		return NULL;
 	}
@@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	/*
 	 *    Do not schedule replies from local real server.
 	 */
-	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
-	    (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
-		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
-			      "Not scheduling reply for existing connection");
-		__ip_vs_conn_put(cp);
-		return NULL;
+	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
+		iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+		cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph);
+		iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+
+		if (cp) {
+			IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
+				      "Not scheduling reply for existing"
+				      " connection");
+			__ip_vs_conn_put(cp);
+			return NULL;
+		}
 	}
 
 	/*
 	 *    Persistent service
 	 */
 	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-		return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
+		return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
 					   iph);
 
 	*ignored = 0;
@@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	/*
 	 *    Non-persistent service
 	 */
-	if (!svc->fwmark && pptr[1] != svc->port) {
+	if (!svc->fwmark && vport != svc->port) {
 		if (!svc->port)
 			pr_err("Schedule: port zero only supported "
 			       "in persistent services, "
@@ -495,11 +524,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	{
 		struct ip_vs_conn_param p;
 
-		ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
-				      &iph->saddr, pptr[0], &iph->daddr,
-				      pptr[1], &p);
+		ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+				      caddr, cport, vaddr, vport, &p);
 		cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
-				    dest->port ? dest->port : pptr[1],
+				    dest->port ? dest->port : vport,
 				    flags, dest, skb->mark);
 		if (!cp) {
 			*ignored = -1;
@@ -519,6 +547,15 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 	return cp;
 }
 
+static inline int ip_vs_addr_is_unicast(struct net *net, int af,
+					union nf_inet_addr *addr)
+{
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST;
+#endif
+	return (inet_addr_type(net, addr->ip) == RTN_UNICAST);
+}
 
 /*
  *  Pass or drop the packet.
@@ -528,33 +565,21 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
 {
-	__be16 _ports[2], *pptr;
-#ifdef CONFIG_SYSCTL
-	struct net *net;
-	struct netns_ipvs *ipvs;
-	int unicast;
-#endif
+	__be16 _ports[2], *pptr, dport;
+	struct netns_ipvs *ipvs = svc->ipvs;
+	struct net *net = ipvs->net;
 
 	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
-	if (pptr == NULL) {
+	if (!pptr)
 		return NF_DROP;
-	}
-
-#ifdef CONFIG_SYSCTL
-	net = skb_net(skb);
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (svc->af == AF_INET6)
-		unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
-	else
-#endif
-		unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
+	dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
 
 	/* if it is fwmark-based service, the cache_bypass sysctl is up
 	   and the destination is a non-local unicast, then create
 	   a cache_bypass connection entry */
-	ipvs = net_ipvs(net);
-	if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
+	if (sysctl_cache_bypass(ipvs) && svc->fwmark &&
+	    !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) &&
+	    ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) {
 		int ret;
 		struct ip_vs_conn *cp;
 		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -566,7 +591,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
 		{
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
+			ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
 					      &iph->saddr, pptr[0],
 					      &iph->daddr, pptr[1], &p);
 			cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
@@ -590,7 +615,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		ip_vs_conn_put(cp);
 		return ret;
 	}
-#endif
 
 	/*
 	 * When the virtual ftp service is presented, packets destined
@@ -598,9 +622,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	 * listed in the ipvs table), pass the packets, because it is
 	 * not ipvs job to decide to drop the packets.
 	 */
-	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
+	if (svc->port == FTPPORT && dport != FTPPORT)
 		return NF_ACCEPT;
 
+	if (unlikely(ip_vs_iph_icmp(iph)))
+		return NF_DROP;
+
 	/*
 	 * Notify the client that the destination is unreachable, and
 	 * release the socket buffer.
@@ -610,11 +637,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	 */
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6) {
-		if (!skb->dev) {
-			struct net *net_ = dev_net(skb_dst(skb)->dev);
-
-			skb->dev = net_->loopback_dev;
-		}
+		if (!skb->dev)
+			skb->dev = net->loopback_dev;
 		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
 	} else
 #endif
@@ -625,15 +649,13 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 
 #ifdef CONFIG_SYSCTL
 
-static int sysctl_snat_reroute(struct sk_buff *skb)
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
 	return ipvs->sysctl_snat_reroute;
 }
 
-static int sysctl_nat_icmp_send(struct net *net)
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	return ipvs->sysctl_nat_icmp_send;
 }
 
@@ -644,8 +666,8 @@ static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
 
 #else
 
-static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
-static int sysctl_nat_icmp_send(struct net *net) { return 0; }
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
 static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
 
 #endif
@@ -664,12 +686,13 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
 	return IP_DEFRAG_VS_OUT;
 }
 
-static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs,
+				     struct sk_buff *skb, u_int32_t user)
 {
 	int err;
 
 	local_bh_disable();
-	err = ip_defrag(skb, user);
+	err = ip_defrag(ipvs->net, skb, user);
 	local_bh_enable();
 	if (!err)
 		ip_send_check(ip_hdr(skb));
@@ -677,10 +700,10 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
 	return err;
 }
 
-static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
-				 unsigned int hooknum)
+static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
+				 struct sk_buff *skb, unsigned int hooknum)
 {
-	if (!sysctl_snat_reroute(skb))
+	if (!sysctl_snat_reroute(ipvs))
 		return 0;
 	/* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */
 	if (NF_INET_LOCAL_IN == hooknum)
@@ -690,12 +713,12 @@ static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
 		struct dst_entry *dst = skb_dst(skb);
 
 		if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
-		    ip6_route_me_harder(skb) != 0)
+		    ip6_route_me_harder(ipvs->net, skb) != 0)
 			return 1;
 	} else
 #endif
 		if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
-		    ip_route_me_harder(skb, RTN_LOCAL) != 0)
+		    ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
 			return 1;
 
 	return 0;
@@ -848,7 +871,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 #endif
 		ip_vs_nat_icmp(skb, pp, cp, 1);
 
-	if (ip_vs_route_me_harder(af, skb, hooknum))
+	if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
 		goto out;
 
 	/* do the statistics and put it back */
@@ -872,8 +895,8 @@ out:
  *	Find any that might be relevant, check against existing connections.
  *	Currently handles error types - unreachable, quench, ttl exceeded.
  */
-static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
-			  unsigned int hooknum)
+static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			  int *related, unsigned int hooknum)
 {
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
@@ -888,7 +911,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 
 	/* reassemble IP fragments */
 	if (ip_is_fragment(ip_hdr(skb))) {
-		if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
+		if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
 			return NF_STOLEN;
 	}
 
@@ -934,10 +957,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 	IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
 		      "Checking outgoing ICMP for");
 
-	ip_vs_fill_ip4hdr(cih, &ciph);
-	ciph.len += offset;
+	ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
+
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
+	cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph);
 	if (!cp)
 		return NF_ACCEPT;
 
@@ -947,16 +970,16 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
 }
 
 #ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
-			     unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
+static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			     int *related,  unsigned int hooknum,
+			     struct ip_vs_iphdr *ipvsh)
 {
 	struct icmp6hdr	_icmph, *ic;
-	struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
 	struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
 	union nf_inet_addr snet;
-	unsigned int writable;
+	unsigned int offset;
 
 	*related = 1;
 	ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
@@ -984,31 +1007,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
 		  &ipvsh->saddr, &ipvsh->daddr);
 
-	/* Now find the contained IP header */
-	ciph.len = ipvsh->len + sizeof(_icmph);
-	ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
-	if (ip6h == NULL)
+	if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph),
+				     true, &ciph))
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
-	ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
-	ciph.daddr.in6 = ip6h->daddr;
-	/* skip possible IPv6 exthdrs of contained IPv6 packet */
-	ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
-	if (ciph.protocol < 0)
-		return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
 
 	pp = ip_vs_proto_get(ciph.protocol);
 	if (!pp)
 		return NF_ACCEPT;
 
 	/* The embedded headers contain source and dest in reverse order */
-	cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
+	cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph);
 	if (!cp)
 		return NF_ACCEPT;
 
 	snet.in6 = ciph.saddr.in6;
-	writable = ciph.len;
+	offset = ciph.len;
 	return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
-				    pp, writable, sizeof(struct ipv6hdr),
+				    pp, offset, sizeof(struct ipv6hdr),
 				    hooknum);
 }
 #endif
@@ -1093,7 +1108,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 {
 	struct ip_vs_protocol *pp = pd->pp;
 
-	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
+	IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
 
 	if (!skb_make_writable(skb, iph->len))
 		goto drop;
@@ -1127,10 +1142,10 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
 	 * if it came from this machine itself.  So re-compute
 	 * the routing information.
 	 */
-	if (ip_vs_route_me_harder(af, skb, hooknum))
+	if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
 		goto drop;
 
-	IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
+	IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");
 
 	ip_vs_out_stats(cp, skb);
 	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
@@ -1155,9 +1170,8 @@ drop:
  *	Check if outgoing packet belongs to the established ip_vs_conn.
  */
 static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
 {
-	struct net *net = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_proto_data *pd;
@@ -1182,16 +1196,15 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	if (unlikely(!skb_dst(skb)))
 		return NF_ACCEPT;
 
-	net = skb_net(skb);
-	if (!net_ipvs(net)->enable)
+	if (!ipvs->enable)
 		return NF_ACCEPT;
 
-	ip_vs_fill_iph_skb(af, skb, &iph);
+	ip_vs_fill_iph_skb(af, skb, false, &iph);
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6) {
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related;
-			int verdict = ip_vs_out_icmp_v6(skb, &related,
+			int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related,
 							hooknum, &iph);
 
 			if (related)
@@ -1201,13 +1214,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 #endif
 		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
 			int related;
-			int verdict = ip_vs_out_icmp(skb, &related, hooknum);
+			int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum);
 
 			if (related)
 				return verdict;
 		}
 
-	pd = ip_vs_proto_data_get(net, iph.protocol);
+	pd = ip_vs_proto_data_get(ipvs, iph.protocol);
 	if (unlikely(!pd))
 		return NF_ACCEPT;
 	pp = pd->pp;
@@ -1217,21 +1230,21 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 	if (af == AF_INET)
 #endif
 		if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
-			if (ip_vs_gather_frags(skb,
+			if (ip_vs_gather_frags(ipvs, skb,
 					       ip_vs_defrag_user(hooknum)))
 				return NF_STOLEN;
 
-			ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
+			ip_vs_fill_iph_skb(AF_INET, skb, false, &iph);
 		}
 
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(af, skb, &iph, 0);
+	cp = pp->conn_out_get(ipvs, af, skb, &iph);
 
 	if (likely(cp))
 		return handle_response(af, skb, pd, cp, &iph, hooknum);
-	if (sysctl_nat_icmp_send(net) &&
+	if (sysctl_nat_icmp_send(ipvs) &&
 	    (pp->protocol == IPPROTO_TCP ||
 	     pp->protocol == IPPROTO_UDP ||
 	     pp->protocol == IPPROTO_SCTP)) {
@@ -1241,7 +1254,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 					 sizeof(_ports), _ports, &iph);
 		if (pptr == NULL)
 			return NF_ACCEPT;	/* Not for me */
-		if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
+		if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
 					   pptr[0])) {
 			/*
 			 * Notify the real server: there is no
@@ -1258,7 +1271,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 #ifdef CONFIG_IP_VS_IPV6
 				if (af == AF_INET6) {
 					if (!skb->dev)
-						skb->dev = net->loopback_dev;
+						skb->dev = ipvs->net->loopback_dev;
 					icmpv6_send(skb,
 						    ICMPV6_DEST_UNREACH,
 						    ICMPV6_PORT_UNREACH,
@@ -1272,7 +1285,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
 			}
 		}
 	}
-	IP_VS_DBG_PKT(12, af, pp, skb, 0,
+	IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
 		      "ip_vs_out: packet continues traversal as normal");
 	return NF_ACCEPT;
 }
@@ -1283,10 +1296,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply4(void *priv, struct sk_buff *skb,
 	     const struct nf_hook_state *state)
 {
-	return ip_vs_out(ops->hooknum, skb, AF_INET);
+	return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
 }
 
 /*
@@ -1294,10 +1307,10 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply4(void *priv, struct sk_buff *skb,
 		   const struct nf_hook_state *state)
 {
-	return ip_vs_out(ops->hooknum, skb, AF_INET);
+	return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1308,10 +1321,10 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply6(void *priv, struct sk_buff *skb,
 	     const struct nf_hook_state *state)
 {
-	return ip_vs_out(ops->hooknum, skb, AF_INET6);
+	return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
 }
 
 /*
@@ -1319,14 +1332,51 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Check if packet is reply for established ip_vs_conn.
  */
 static unsigned int
-ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply6(void *priv, struct sk_buff *skb,
 		   const struct nf_hook_state *state)
 {
-	return ip_vs_out(ops->hooknum, skb, AF_INET6);
+	return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
 }
 
 #endif
 
+static unsigned int
+ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		      struct ip_vs_proto_data *pd,
+		      int *verdict, struct ip_vs_conn **cpp,
+		      struct ip_vs_iphdr *iph)
+{
+	struct ip_vs_protocol *pp = pd->pp;
+
+	if (!iph->fragoffs) {
+		/* No (second) fragments need to enter here, as nf_defrag_ipv6
+		 * replayed fragment zero will already have created the cp
+		 */
+
+		/* Schedule and create new connection entry into cpp */
+		if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
+			return 0;
+	}
+
+	if (unlikely(!*cpp)) {
+		/* sorry, all this trouble for a no-hit :) */
+		IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
+			      "ip_vs_in: packet continues traversal as normal");
+		if (iph->fragoffs) {
+			/* Fragment that couldn't be mapped to a conn entry
+			 * is missing module nf_defrag_ipv6
+			 */
+			IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+			IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
+				      "unhandled fragment");
+		}
+		*verdict = NF_ACCEPT;
+		return 0;
+	}
+
+	return 1;
+}
+
 /*
  *	Handle ICMP messages in the outside-to-inside direction (incoming).
  *	Find any that might be relevant, check against existing connections,
@@ -1334,9 +1384,9 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Currently handles error types - unreachable, quench, ttl exceeded.
  */
 static int
-ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
+ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
+	      unsigned int hooknum)
 {
-	struct net *net = NULL;
 	struct iphdr *iph;
 	struct icmphdr	_icmph, *ic;
 	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
@@ -1345,13 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	struct ip_vs_protocol *pp;
 	struct ip_vs_proto_data *pd;
 	unsigned int offset, offset2, ihl, verdict;
-	bool ipip;
+	bool ipip, new_cp = false;
 
 	*related = 1;
 
 	/* reassemble IP fragments */
 	if (ip_is_fragment(ip_hdr(skb))) {
-		if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
+		if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
 			return NF_STOLEN;
 	}
 
@@ -1385,8 +1435,6 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 	if (cih == NULL)
 		return NF_ACCEPT; /* The packet looks wrong, ignore */
 
-	net = skb_net(skb);
-
 	/* Special case for errors for IPIP packets */
 	ipip = false;
 	if (cih->protocol == IPPROTO_IPIP) {
@@ -1402,7 +1450,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		ipip = true;
 	}
 
-	pd = ip_vs_proto_data_get(net, cih->protocol);
+	pd = ip_vs_proto_data_get(ipvs, cih->protocol);
 	if (!pd)
 		return NF_ACCEPT;
 	pp = pd->pp;
@@ -1416,15 +1464,24 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 		      "Checking incoming ICMP for");
 
 	offset2 = offset;
-	ip_vs_fill_ip4hdr(cih, &ciph);
-	ciph.len += offset;
+	ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph);
 	offset = ciph.len;
+
 	/* The embedded headers contain source and dest in reverse order.
 	 * For IPIP this is error for request, not for reply.
 	 */
-	cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
-	if (!cp)
-		return NF_ACCEPT;
+	cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph);
+
+	if (!cp) {
+		int v;
+
+		if (!sysctl_schedule_icmp(ipvs))
+			return NF_ACCEPT;
+
+		if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
+			return v;
+		new_cp = true;
+	}
 
 	verdict = NF_DROP;
 
@@ -1455,7 +1512,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
 			skb_reset_network_header(skb);
 			IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
 				&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
-			ipv4_update_pmtu(skb, dev_net(skb->dev),
+			ipv4_update_pmtu(skb, ipvs->net,
 					 mtu, 0, 0, 0, 0);
 			/* Client uses PMTUD? */
 			if (!(frag_off & htons(IP_DF)))
@@ -1501,23 +1558,26 @@ ignore_ipip:
 	verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
 
 out:
-	__ip_vs_conn_put(cp);
+	if (likely(!new_cp))
+		__ip_vs_conn_put(cp);
+	else
+		ip_vs_conn_put(cp);
 
 	return verdict;
 }
 
 #ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
-			    unsigned int hooknum, struct ip_vs_iphdr *iph)
+static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+			    int *related, unsigned int hooknum,
+			    struct ip_vs_iphdr *iph)
 {
-	struct net *net = NULL;
-	struct ipv6hdr _ip6h, *ip6h;
 	struct icmp6hdr	_icmph, *ic;
 	struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_proto_data *pd;
-	unsigned int offs_ciph, writable, verdict;
+	unsigned int offset, verdict;
+	bool new_cp = false;
 
 	*related = 1;
 
@@ -1546,21 +1606,11 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
 		  ic->icmp6_type, ntohs(icmpv6_id(ic)),
 		  &iph->saddr, &iph->daddr);
 
-	/* Now find the contained IP header */
-	ciph.len = iph->len + sizeof(_icmph);
-	offs_ciph = ciph.len; /* Save ip header offset */
-	ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
-	if (ip6h == NULL)
-		return NF_ACCEPT; /* The packet looks wrong, ignore */
-	ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
-	ciph.daddr.in6 = ip6h->daddr;
-	/* skip possible IPv6 exthdrs of contained IPv6 packet */
-	ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
-	if (ciph.protocol < 0)
-		return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
-
-	net = skb_net(skb);
-	pd = ip_vs_proto_data_get(net, ciph.protocol);
+	offset = iph->len + sizeof(_icmph);
+	if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph))
+		return NF_ACCEPT;
+
+	pd = ip_vs_proto_data_get(ipvs, ciph.protocol);
 	if (!pd)
 		return NF_ACCEPT;
 	pp = pd->pp;
@@ -1569,36 +1619,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
 	if (ciph.fragoffs)
 		return NF_ACCEPT;
 
-	IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
+	IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
 		      "Checking incoming ICMPv6 for");
 
 	/* The embedded headers contain source and dest in reverse order
 	 * if not from localhost
 	 */
-	cp = pp->conn_in_get(AF_INET6, skb, &ciph,
-			     (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
+	cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph);
+
+	if (!cp) {
+		int v;
+
+		if (!sysctl_schedule_icmp(ipvs))
+			return NF_ACCEPT;
+
+		if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph))
+			return v;
+
+		new_cp = true;
+	}
 
-	if (!cp)
-		return NF_ACCEPT;
 	/* VS/TUN, VS/DR and LOCALNODE just let it go */
 	if ((hooknum == NF_INET_LOCAL_OUT) &&
 	    (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
-		__ip_vs_conn_put(cp);
-		return NF_ACCEPT;
+		verdict = NF_ACCEPT;
+		goto out;
 	}
 
 	/* do the statistics and put it back */
 	ip_vs_in_stats(cp, skb);
 
 	/* Need to mangle contained IPv6 header in ICMPv6 packet */
-	writable = ciph.len;
+	offset = ciph.len;
 	if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
 	    IPPROTO_SCTP == ciph.protocol)
-		writable += 2 * sizeof(__u16); /* Also mangle ports */
+		offset += 2 * sizeof(__u16); /* Also mangle ports */
 
-	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
+	verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph);
 
-	__ip_vs_conn_put(cp);
+out:
+	if (likely(!new_cp))
+		__ip_vs_conn_put(cp);
+	else
+		ip_vs_conn_put(cp);
 
 	return verdict;
 }
@@ -1610,15 +1673,13 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
  *	and send it on its way...
  */
 static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
 {
-	struct net *net;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_protocol *pp;
 	struct ip_vs_proto_data *pd;
 	struct ip_vs_conn *cp;
 	int ret, pkts;
-	struct netns_ipvs *ipvs;
 	int conn_reuse_mode;
 
 	/* Already marked as IPVS request or reply? */
@@ -1633,7 +1694,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	if (unlikely((skb->pkt_type != PACKET_HOST &&
 		      hooknum != NF_INET_LOCAL_OUT) ||
 		     !skb_dst(skb))) {
-		ip_vs_fill_iph_skb(af, skb, &iph);
+		ip_vs_fill_iph_skb(af, skb, false, &iph);
 		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
 			      " ignored in hook %u\n",
 			      skb->pkt_type, iph.protocol,
@@ -1641,12 +1702,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 		return NF_ACCEPT;
 	}
 	/* ipvs enabled in this netns ? */
-	net = skb_net(skb);
-	ipvs = net_ipvs(net);
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	ip_vs_fill_iph_skb(af, skb, &iph);
+	ip_vs_fill_iph_skb(af, skb, false, &iph);
 
 	/* Bad... Do not break raw sockets */
 	if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
@@ -1662,8 +1721,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 	if (af == AF_INET6) {
 		if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
 			int related;
-			int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
-						       &iph);
+			int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related,
+						       hooknum, &iph);
 
 			if (related)
 				return verdict;
@@ -1672,21 +1731,30 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 #endif
 		if (unlikely(iph.protocol == IPPROTO_ICMP)) {
 			int related;
-			int verdict = ip_vs_in_icmp(skb, &related, hooknum);
+			int verdict = ip_vs_in_icmp(ipvs, skb, &related,
+						    hooknum);
 
 			if (related)
 				return verdict;
 		}
 
 	/* Protocol supported? */
-	pd = ip_vs_proto_data_get(net, iph.protocol);
-	if (unlikely(!pd))
+	pd = ip_vs_proto_data_get(ipvs, iph.protocol);
+	if (unlikely(!pd)) {
+		/* The only way we'll see this packet again is if it's
+		 * encapsulated, so mark it with ipvs_property=1 so we
+		 * skip it if we're ignoring tunneled packets
+		 */
+		if (sysctl_ignore_tunneled(ipvs))
+			skb->ipvs_property = 1;
+
 		return NF_ACCEPT;
+	}
 	pp = pd->pp;
 	/*
 	 * Check if the packet belongs to an existing connection entry
 	 */
-	cp = pp->conn_in_get(af, skb, &iph, 0);
+	cp = pp->conn_in_get(ipvs, af, skb, &iph);
 
 	conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
 	if (conn_reuse_mode && !iph.fragoffs &&
@@ -1700,32 +1768,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 		cp = NULL;
 	}
 
-	if (unlikely(!cp) && !iph.fragoffs) {
-		/* No (second) fragments need to enter here, as nf_defrag_ipv6
-		 * replayed fragment zero will already have created the cp
-		 */
+	if (unlikely(!cp)) {
 		int v;
 
-		/* Schedule and create new connection entry into &cp */
-		if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
+		if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
 			return v;
 	}
 
-	if (unlikely(!cp)) {
-		/* sorry, all this trouble for a no-hit :) */
-		IP_VS_DBG_PKT(12, af, pp, skb, 0,
-			      "ip_vs_in: packet continues traversal as normal");
-		if (iph.fragoffs) {
-			/* Fragment that couldn't be mapped to a conn entry
-			 * is missing module nf_defrag_ipv6
-			 */
-			IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
-			IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
-		}
-		return NF_ACCEPT;
-	}
+	IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
 
-	IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
 	/* Check the server status */
 	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
 		/* the destination server is not available */
@@ -1765,7 +1816,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
 		pkts = atomic_add_return(1, &cp->in_pkts);
 
 	if (ipvs->sync_state & IP_VS_STATE_MASTER)
-		ip_vs_sync_conn(net, cp, pkts);
+		ip_vs_sync_conn(ipvs, cp, pkts);
 
 	ip_vs_conn_put(cp);
 	return ret;
@@ -1776,10 +1827,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request4(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
-	return ip_vs_in(ops->hooknum, skb, AF_INET);
+	return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
 }
 
 /*
@@ -1787,10 +1838,10 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request4(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	return ip_vs_in(ops->hooknum, skb, AF_INET);
+	return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -1800,10 +1851,10 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Schedule and forward packets from remote clients
  */
 static unsigned int
-ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request6(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
-	return ip_vs_in(ops->hooknum, skb, AF_INET6);
+	return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
 }
 
 /*
@@ -1811,10 +1862,10 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *	Schedule and forward packets from local clients
  */
 static unsigned int
-ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request6(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
-	return ip_vs_in(ops->hooknum, skb, AF_INET6);
+	return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
 }
 
 #endif
@@ -1830,46 +1881,40 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
  *      and send them to ip_vs_in_icmp.
  */
 static unsigned int
-ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
 		   const struct nf_hook_state *state)
 {
 	int r;
-	struct net *net;
-	struct netns_ipvs *ipvs;
+	struct netns_ipvs *ipvs = net_ipvs(state->net);
 
 	if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
 		return NF_ACCEPT;
 
 	/* ipvs enabled in this netns ? */
-	net = skb_net(skb);
-	ipvs = net_ipvs(net);
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp(skb, &r, ops->hooknum);
+	return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
 static unsigned int
-ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
 		      const struct nf_hook_state *state)
 {
 	int r;
-	struct net *net;
-	struct netns_ipvs *ipvs;
+	struct netns_ipvs *ipvs = net_ipvs(state->net);
 	struct ip_vs_iphdr iphdr;
 
-	ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
+	ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
 	if (iphdr.protocol != IPPROTO_ICMPV6)
 		return NF_ACCEPT;
 
 	/* ipvs enabled in this netns ? */
-	net = skb_net(skb);
-	ipvs = net_ipvs(net);
 	if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
 		return NF_ACCEPT;
 
-	return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
+	return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
 }
 #endif
 
@@ -1878,7 +1923,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC - 2,
@@ -1888,7 +1932,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	 * applied to IPVS. */
 	{
 		.hook		= ip_vs_remote_request4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC - 1,
@@ -1896,7 +1939,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* Before ip_vs_in, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_local_reply4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_NAT_DST + 1,
@@ -1904,7 +1946,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After mangle, schedule and forward local requests */
 	{
 		.hook		= ip_vs_local_request4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_NAT_DST + 2,
@@ -1913,7 +1954,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
 	{
 		.hook		= ip_vs_forward_icmp,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= 99,
@@ -1921,7 +1961,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply4,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= 100,
@@ -1930,7 +1969,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_NAT_SRC - 2,
@@ -1940,7 +1978,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	 * applied to IPVS. */
 	{
 		.hook		= ip_vs_remote_request6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_NAT_SRC - 1,
@@ -1948,7 +1985,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* Before ip_vs_in, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_local_reply6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST + 1,
@@ -1956,7 +1992,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After mangle, schedule and forward local requests */
 	{
 		.hook		= ip_vs_local_request6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST + 2,
@@ -1965,7 +2000,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
 	{
 		.hook		= ip_vs_forward_icmp_v6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= 99,
@@ -1973,7 +2007,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
 	/* After packet filtering, change source only for VS/NAT */
 	{
 		.hook		= ip_vs_reply6,
-		.owner		= THIS_MODULE,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_FORWARD,
 		.priority	= 100,
@@ -1999,22 +2032,22 @@ static int __net_init __ip_vs_init(struct net *net)
 	atomic_inc(&ipvs_netns_cnt);
 	net->ipvs = ipvs;
 
-	if (ip_vs_estimator_net_init(net) < 0)
+	if (ip_vs_estimator_net_init(ipvs) < 0)
 		goto estimator_fail;
 
-	if (ip_vs_control_net_init(net) < 0)
+	if (ip_vs_control_net_init(ipvs) < 0)
 		goto control_fail;
 
-	if (ip_vs_protocol_net_init(net) < 0)
+	if (ip_vs_protocol_net_init(ipvs) < 0)
 		goto protocol_fail;
 
-	if (ip_vs_app_net_init(net) < 0)
+	if (ip_vs_app_net_init(ipvs) < 0)
 		goto app_fail;
 
-	if (ip_vs_conn_net_init(net) < 0)
+	if (ip_vs_conn_net_init(ipvs) < 0)
 		goto conn_fail;
 
-	if (ip_vs_sync_net_init(net) < 0)
+	if (ip_vs_sync_net_init(ipvs) < 0)
 		goto sync_fail;
 
 	printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
@@ -2025,15 +2058,15 @@ static int __net_init __ip_vs_init(struct net *net)
  */
 
 sync_fail:
-	ip_vs_conn_net_cleanup(net);
+	ip_vs_conn_net_cleanup(ipvs);
 conn_fail:
-	ip_vs_app_net_cleanup(net);
+	ip_vs_app_net_cleanup(ipvs);
 app_fail:
-	ip_vs_protocol_net_cleanup(net);
+	ip_vs_protocol_net_cleanup(ipvs);
 protocol_fail:
-	ip_vs_control_net_cleanup(net);
+	ip_vs_control_net_cleanup(ipvs);
 control_fail:
-	ip_vs_estimator_net_cleanup(net);
+	ip_vs_estimator_net_cleanup(ipvs);
 estimator_fail:
 	net->ipvs = NULL;
 	return -ENOMEM;
@@ -2041,22 +2074,25 @@ estimator_fail:
 
 static void __net_exit __ip_vs_cleanup(struct net *net)
 {
-	ip_vs_service_net_cleanup(net);	/* ip_vs_flush() with locks */
-	ip_vs_conn_net_cleanup(net);
-	ip_vs_app_net_cleanup(net);
-	ip_vs_protocol_net_cleanup(net);
-	ip_vs_control_net_cleanup(net);
-	ip_vs_estimator_net_cleanup(net);
-	IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	ip_vs_service_net_cleanup(ipvs);	/* ip_vs_flush() with locks */
+	ip_vs_conn_net_cleanup(ipvs);
+	ip_vs_app_net_cleanup(ipvs);
+	ip_vs_protocol_net_cleanup(ipvs);
+	ip_vs_control_net_cleanup(ipvs);
+	ip_vs_estimator_net_cleanup(ipvs);
+	IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
 	net->ipvs = NULL;
 }
 
 static void __net_exit __ip_vs_dev_cleanup(struct net *net)
 {
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	EnterFunction(2);
-	net_ipvs(net)->enable = 0;	/* Disable packet reception */
+	ipvs->enable = 0;	/* Disable packet reception */
 	smp_wmb();
-	ip_vs_sync_net_cleanup(net);
+	ip_vs_sync_net_cleanup(ipvs);
 	LeaveFunction(2);
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1a23e91d50d8..e7c1b052c2a3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -228,7 +228,7 @@ static void defense_work_handler(struct work_struct *work)
 
 	update_defense_level(ipvs);
 	if (atomic_read(&ipvs->dropentry))
-		ip_vs_random_dropentry(ipvs->net);
+		ip_vs_random_dropentry(ipvs);
 	schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
 }
 #endif
@@ -263,7 +263,7 @@ static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
  *	Returns hash value for virtual service
  */
 static inline unsigned int
-ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
+ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
 		  const union nf_inet_addr *addr, __be16 port)
 {
 	register unsigned int porth = ntohs(port);
@@ -276,7 +276,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
 			    addr->ip6[2]^addr->ip6[3];
 #endif
 	ahash = ntohl(addr_fold);
-	ahash ^= ((size_t) net >> 8);
+	ahash ^= ((size_t) ipvs >> 8);
 
 	return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
 	       IP_VS_SVC_TAB_MASK;
@@ -285,9 +285,9 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
 /*
  *	Returns hash value of fwmark for virtual service lookup
  */
-static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
+static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
 {
-	return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
+	return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
 }
 
 /*
@@ -309,14 +309,14 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 		/*
 		 *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
 		 */
-		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+		hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
 					 &svc->addr, svc->port);
 		hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
 	} else {
 		/*
 		 *  Hash it by fwmark in svc_fwm_table
 		 */
-		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
+		hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
 		hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
 	}
 
@@ -357,21 +357,21 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
  *	Get service by {netns, proto,addr,port} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
 		     const union nf_inet_addr *vaddr, __be16 vport)
 {
 	unsigned int hash;
 	struct ip_vs_service *svc;
 
 	/* Check for "full" addressed entries */
-	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
+	hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
 
 	hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
 		if ((svc->af == af)
 		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
 		    && (svc->port == vport)
 		    && (svc->protocol == protocol)
-		    && net_eq(svc->net, net)) {
+		    && (svc->ipvs == ipvs)) {
 			/* HIT */
 			return svc;
 		}
@@ -385,17 +385,17 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
  *	Get service by {fwmark} in the service table.
  */
 static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
 {
 	unsigned int hash;
 	struct ip_vs_service *svc;
 
 	/* Check for fwmark addressed entries */
-	hash = ip_vs_svc_fwm_hashkey(net, fwmark);
+	hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
 
 	hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
 		if (svc->fwmark == fwmark && svc->af == af
-		    && net_eq(svc->net, net)) {
+		    && (svc->ipvs == ipvs)) {
 			/* HIT */
 			return svc;
 		}
@@ -406,17 +406,16 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
 
 /* Find service, called under RCU lock */
 struct ip_vs_service *
-ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
 		   const union nf_inet_addr *vaddr, __be16 vport)
 {
 	struct ip_vs_service *svc;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	/*
 	 *	Check the table hashed by fwmark first
 	 */
 	if (fwmark) {
-		svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+		svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
 		if (svc)
 			goto out;
 	}
@@ -425,7 +424,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
 	 *	Check the table hashed by <protocol,addr,port>
 	 *	for "full" addressed entries
 	 */
-	svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
+	svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
 
 	if (svc == NULL
 	    && protocol == IPPROTO_TCP
@@ -435,7 +434,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		 * Check if ftp service entry exists, the packet
 		 * might belong to FTP data connections.
 		 */
-		svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
+		svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
 	}
 
 	if (svc == NULL
@@ -443,7 +442,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
 		/*
 		 * Check if the catch-all port (port zero) exists
 		 */
-		svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
+		svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
 	}
 
   out:
@@ -543,10 +542,9 @@ static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
 }
 
 /* Check if real service by <proto,addr,port> is present */
-bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
 			    const union nf_inet_addr *daddr, __be16 dport)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	unsigned int hash;
 	struct ip_vs_dest *dest;
 
@@ -601,7 +599,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
  * on the backup.
  * Called under RCU lock, no refcnt is returned.
  */
-struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int svc_af, int dest_af,
+struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
 				   const union nf_inet_addr *daddr,
 				   __be16 dport,
 				   const union nf_inet_addr *vaddr,
@@ -612,7 +610,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int svc_af, int dest_af,
 	struct ip_vs_service *svc;
 	__be16 port = dport;
 
-	svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
+	svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
 	if (!svc)
 		return NULL;
 	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -660,7 +658,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
 		     const union nf_inet_addr *daddr, __be16 dport)
 {
 	struct ip_vs_dest *dest;
-	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	struct netns_ipvs *ipvs = svc->ipvs;
 
 	/*
 	 * Find the destination in trash
@@ -715,10 +713,9 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest)
  *  are expired, and the refcnt of each destination in the trash must
  *  be 0, so we simply release them here.
  */
-static void ip_vs_trash_cleanup(struct net *net)
+static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
 {
 	struct ip_vs_dest *dest, *nxt;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	del_timer_sync(&ipvs->dest_trash_timer);
 	/* No need to use dest_trash_lock */
@@ -788,7 +785,7 @@ static void
 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 		    struct ip_vs_dest_user_kern *udest, int add)
 {
-	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	struct netns_ipvs *ipvs = svc->ipvs;
 	struct ip_vs_service *old_svc;
 	struct ip_vs_scheduler *sched;
 	int conn_flags;
@@ -843,7 +840,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
 	spin_unlock_bh(&dest->dst_lock);
 
 	if (add) {
-		ip_vs_start_estimator(svc->net, &dest->stats);
+		ip_vs_start_estimator(svc->ipvs, &dest->stats);
 		list_add_rcu(&dest->n_list, &svc->destinations);
 		svc->num_dests++;
 		sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -874,12 +871,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 		atype = ipv6_addr_type(&udest->addr.in6);
 		if ((!(atype & IPV6_ADDR_UNICAST) ||
 			atype & IPV6_ADDR_LINKLOCAL) &&
-			!__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
+			!__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
 			return -EINVAL;
 	} else
 #endif
 	{
-		atype = inet_addr_type(svc->net, udest->addr.ip);
+		atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
 		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
 			return -EINVAL;
 	}
@@ -1036,12 +1033,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 /*
  *	Delete a destination (must be already unlinked from the service)
  */
-static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
+static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
 			     bool cleanup)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
-	ip_vs_stop_estimator(net, &dest->stats);
+	ip_vs_stop_estimator(ipvs, &dest->stats);
 
 	/*
 	 *  Remove it from the d-linked list with the real services.
@@ -1079,7 +1074,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
 	svc->num_dests--;
 
 	if (dest->af != svc->af)
-		net_ipvs(svc->net)->mixed_address_family_dests--;
+		svc->ipvs->mixed_address_family_dests--;
 
 	if (svcupd) {
 		struct ip_vs_scheduler *sched;
@@ -1120,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	/*
 	 *	Delete the destination
 	 */
-	__ip_vs_del_dest(svc->net, dest, false);
+	__ip_vs_del_dest(svc->ipvs, dest, false);
 
 	LeaveFunction(2);
 
@@ -1129,8 +1124,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 
 static void ip_vs_dest_trash_expire(unsigned long data)
 {
-	struct net *net = (struct net *) data;
-	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
 	struct ip_vs_dest *dest, *next;
 	unsigned long now = jiffies;
 
@@ -1163,14 +1157,13 @@ static void ip_vs_dest_trash_expire(unsigned long data)
  *	Add a service into the service hash table
  */
 static int
-ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
 		  struct ip_vs_service **svc_p)
 {
 	int ret = 0, i;
 	struct ip_vs_scheduler *sched = NULL;
 	struct ip_vs_pe *pe = NULL;
 	struct ip_vs_service *svc = NULL;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	/* increase the module use count */
 	ip_vs_use_count_inc();
@@ -1237,7 +1230,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	svc->flags = u->flags;
 	svc->timeout = u->timeout * HZ;
 	svc->netmask = u->netmask;
-	svc->net = net;
+	svc->ipvs = ipvs;
 
 	INIT_LIST_HEAD(&svc->destinations);
 	spin_lock_init(&svc->sched_lock);
@@ -1261,7 +1254,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	else if (svc->port == 0)
 		atomic_inc(&ipvs->nullsvc_counter);
 
-	ip_vs_start_estimator(net, &svc->stats);
+	ip_vs_start_estimator(ipvs, &svc->stats);
 
 	/* Count only IPv4 services for old get/setsockopt interface */
 	if (svc->af == AF_INET)
@@ -1381,7 +1374,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
 	struct ip_vs_dest *dest, *nxt;
 	struct ip_vs_scheduler *old_sched;
 	struct ip_vs_pe *old_pe;
-	struct netns_ipvs *ipvs = net_ipvs(svc->net);
+	struct netns_ipvs *ipvs = svc->ipvs;
 
 	pr_info("%s: enter\n", __func__);
 
@@ -1389,7 +1382,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
 	if (svc->af == AF_INET)
 		ipvs->num_services--;
 
-	ip_vs_stop_estimator(svc->net, &svc->stats);
+	ip_vs_stop_estimator(svc->ipvs, &svc->stats);
 
 	/* Unbind scheduler */
 	old_sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -1405,7 +1398,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
 	 */
 	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
 		__ip_vs_unlink_dest(svc, dest, 0);
-		__ip_vs_del_dest(svc->net, dest, cleanup);
+		__ip_vs_del_dest(svc->ipvs, dest, cleanup);
 	}
 
 	/*
@@ -1456,7 +1449,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
 /*
  *	Flush all the virtual services
  */
-static int ip_vs_flush(struct net *net, bool cleanup)
+static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
 {
 	int idx;
 	struct ip_vs_service *svc;
@@ -1468,7 +1461,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
 					  s_list) {
-			if (net_eq(svc->net, net))
+			if (svc->ipvs == ipvs)
 				ip_vs_unlink_service(svc, cleanup);
 		}
 	}
@@ -1479,7 +1472,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
 					  f_list) {
-			if (net_eq(svc->net, net))
+			if (svc->ipvs == ipvs)
 				ip_vs_unlink_service(svc, cleanup);
 		}
 	}
@@ -1491,12 +1484,12 @@ static int ip_vs_flush(struct net *net, bool cleanup)
  *	Delete service by {netns} in the service table.
  *	Called by __ip_vs_cleanup()
  */
-void ip_vs_service_net_cleanup(struct net *net)
+void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
 {
 	EnterFunction(2);
 	/* Check for "full" addressed entries */
 	mutex_lock(&__ip_vs_mutex);
-	ip_vs_flush(net, true);
+	ip_vs_flush(ipvs, true);
 	mutex_unlock(&__ip_vs_mutex);
 	LeaveFunction(2);
 }
@@ -1540,7 +1533,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
 	mutex_lock(&__ip_vs_mutex);
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			if (net_eq(svc->net, net)) {
+			if (svc->ipvs == ipvs) {
 				list_for_each_entry(dest, &svc->destinations,
 						    n_list) {
 					ip_vs_forget_dev(dest, dev);
@@ -1549,7 +1542,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
 		}
 
 		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			if (net_eq(svc->net, net)) {
+			if (svc->ipvs == ipvs) {
 				list_for_each_entry(dest, &svc->destinations,
 						    n_list) {
 					ip_vs_forget_dev(dest, dev);
@@ -1583,26 +1576,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
 	return 0;
 }
 
-static int ip_vs_zero_all(struct net *net)
+static int ip_vs_zero_all(struct netns_ipvs *ipvs)
 {
 	int idx;
 	struct ip_vs_service *svc;
 
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
-			if (net_eq(svc->net, net))
+			if (svc->ipvs == ipvs)
 				ip_vs_zero_service(svc);
 		}
 	}
 
 	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
-			if (net_eq(svc->net, net))
+			if (svc->ipvs == ipvs)
 				ip_vs_zero_service(svc);
 		}
 	}
 
-	ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
+	ip_vs_zero_stats(&ipvs->tot_stats);
 	return 0;
 }
 
@@ -1615,7 +1608,7 @@ static int
 proc_do_defense_mode(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	struct net *net = current->nsproxy->net_ns;
+	struct netns_ipvs *ipvs = table->extra2;
 	int *valp = table->data;
 	int val = *valp;
 	int rc;
@@ -1626,7 +1619,7 @@ proc_do_defense_mode(struct ctl_table *table, int write,
 			/* Restore the correct value */
 			*valp = val;
 		} else {
-			update_defense_level(net_ipvs(net));
+			update_defense_level(ipvs);
 		}
 	}
 	return rc;
@@ -1844,6 +1837,18 @@ static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "schedule_icmp",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "ignore_tunneled",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_IP_VS_DEBUG
 	{
 		.procname	= "debug_level",
@@ -1889,6 +1894,7 @@ static inline const char *ip_vs_fwd_name(unsigned int flags)
 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 {
 	struct net *net = seq_file_net(seq);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_iter *iter = seq->private;
 	int idx;
 	struct ip_vs_service *svc;
@@ -1896,7 +1902,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 	/* look in hash by protocol */
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
-			if (net_eq(svc->net, net) && pos-- == 0) {
+			if ((svc->ipvs == ipvs) && pos-- == 0) {
 				iter->table = ip_vs_svc_table;
 				iter->bucket = idx;
 				return svc;
@@ -1908,7 +1914,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
 					 f_list) {
-			if (net_eq(svc->net, net) && pos-- == 0) {
+			if ((svc->ipvs == ipvs) && pos-- == 0) {
 				iter->table = ip_vs_svc_fwm_table;
 				iter->bucket = idx;
 				return svc;
@@ -2196,7 +2202,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = {
 /*
  *	Set timeout values for tcp tcpfin udp in the timeout_table.
  */
-static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
 {
 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
 	struct ip_vs_proto_data *pd;
@@ -2209,13 +2215,13 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
 
 #ifdef CONFIG_IP_VS_PROTO_TCP
 	if (u->tcp_timeout) {
-		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+		pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
 		pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
 			= u->tcp_timeout * HZ;
 	}
 
 	if (u->tcp_fin_timeout) {
-		pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+		pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
 		pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
 			= u->tcp_fin_timeout * HZ;
 	}
@@ -2223,7 +2229,7 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
 
 #ifdef CONFIG_IP_VS_PROTO_UDP
 	if (u->udp_timeout) {
-		pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+		pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
 		pd->timeout_table[IP_VS_UDP_S_NORMAL]
 			= u->udp_timeout * HZ;
 	}
@@ -2344,12 +2350,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 			cfg.syncid = dm->syncid;
 			rtnl_lock();
 			mutex_lock(&ipvs->sync_mutex);
-			ret = start_sync_thread(net, &cfg, dm->state);
+			ret = start_sync_thread(ipvs, &cfg, dm->state);
 			mutex_unlock(&ipvs->sync_mutex);
 			rtnl_unlock();
 		} else {
 			mutex_lock(&ipvs->sync_mutex);
-			ret = stop_sync_thread(net, dm->state);
+			ret = stop_sync_thread(ipvs, dm->state);
 			mutex_unlock(&ipvs->sync_mutex);
 		}
 		goto out_dec;
@@ -2358,11 +2364,11 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	mutex_lock(&__ip_vs_mutex);
 	if (cmd == IP_VS_SO_SET_FLUSH) {
 		/* Flush the virtual service */
-		ret = ip_vs_flush(net, false);
+		ret = ip_vs_flush(ipvs, false);
 		goto out_unlock;
 	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
 		/* Set timeout values for (tcp tcpfin udp) */
-		ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
+		ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
 		goto out_unlock;
 	}
 
@@ -2377,7 +2383,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	if (cmd == IP_VS_SO_SET_ZERO) {
 		/* if no service address is set, zero counters in all */
 		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
-			ret = ip_vs_zero_all(net);
+			ret = ip_vs_zero_all(ipvs);
 			goto out_unlock;
 		}
 	}
@@ -2395,10 +2401,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 	/* Lookup the exact service by <protocol, addr, port> or fwmark */
 	rcu_read_lock();
 	if (usvc.fwmark == 0)
-		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
+		svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
 					   &usvc.addr, usvc.port);
 	else
-		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
+		svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
 	rcu_read_unlock();
 
 	if (cmd != IP_VS_SO_SET_ADD
@@ -2412,7 +2418,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		if (svc != NULL)
 			ret = -EEXIST;
 		else
-			ret = ip_vs_add_service(net, &usvc, &svc);
+			ret = ip_vs_add_service(ipvs, &usvc, &svc);
 		break;
 	case IP_VS_SO_SET_EDIT:
 		ret = ip_vs_edit_service(svc, &usvc);
@@ -2471,7 +2477,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 }
 
 static inline int
-__ip_vs_get_service_entries(struct net *net,
+__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
 			    const struct ip_vs_get_services *get,
 			    struct ip_vs_get_services __user *uptr)
 {
@@ -2483,7 +2489,7 @@ __ip_vs_get_service_entries(struct net *net,
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
 			/* Only expose IPv4 entries to old interface */
-			if (svc->af != AF_INET || !net_eq(svc->net, net))
+			if (svc->af != AF_INET || (svc->ipvs != ipvs))
 				continue;
 
 			if (count >= get->num_services)
@@ -2502,7 +2508,7 @@ __ip_vs_get_service_entries(struct net *net,
 	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
 		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
 			/* Only expose IPv4 entries to old interface */
-			if (svc->af != AF_INET || !net_eq(svc->net, net))
+			if (svc->af != AF_INET || (svc->ipvs != ipvs))
 				continue;
 
 			if (count >= get->num_services)
@@ -2522,7 +2528,7 @@ out:
 }
 
 static inline int
-__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
 			 struct ip_vs_get_dests __user *uptr)
 {
 	struct ip_vs_service *svc;
@@ -2531,9 +2537,9 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 
 	rcu_read_lock();
 	if (get->fwmark)
-		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
+		svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
 	else
-		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
+		svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
 					   get->port);
 	rcu_read_unlock();
 
@@ -2578,7 +2584,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 }
 
 static inline void
-__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
 {
 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
 	struct ip_vs_proto_data *pd;
@@ -2587,12 +2593,12 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
 	memset(u, 0, sizeof (*u));
 
 #ifdef CONFIG_IP_VS_PROTO_TCP
-	pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+	pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
 	u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
 	u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
 #endif
 #ifdef CONFIG_IP_VS_PROTO_UDP
-	pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+	pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
 	u->udp_timeout =
 			pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
 #endif
@@ -2711,7 +2717,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = __ip_vs_get_service_entries(net, get, user);
+		ret = __ip_vs_get_service_entries(ipvs, get, user);
 	}
 	break;
 
@@ -2725,9 +2731,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		addr.ip = entry->addr;
 		rcu_read_lock();
 		if (entry->fwmark)
-			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
+			svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
 		else
-			svc = __ip_vs_service_find(net, AF_INET,
+			svc = __ip_vs_service_find(ipvs, AF_INET,
 						   entry->protocol, &addr,
 						   entry->port);
 		rcu_read_unlock();
@@ -2753,7 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = __ip_vs_get_dest_entries(net, get, user);
+		ret = __ip_vs_get_dest_entries(ipvs, get, user);
 	}
 	break;
 
@@ -2761,7 +2767,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	{
 		struct ip_vs_timeout_user t;
 
-		__ip_vs_get_timeouts(net, &t);
+		__ip_vs_get_timeouts(ipvs, &t);
 		if (copy_to_user(user, &t, sizeof(t)) != 0)
 			ret = -EFAULT;
 	}
@@ -2996,12 +3002,13 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 	int idx = 0, i;
 	int start = cb->args[0];
 	struct ip_vs_service *svc;
-	struct net *net = skb_sknet(skb);
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	mutex_lock(&__ip_vs_mutex);
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
 		hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
-			if (++idx <= start || !net_eq(svc->net, net))
+			if (++idx <= start || (svc->ipvs != ipvs))
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
 				idx--;
@@ -3012,7 +3019,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
 
 	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
 		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
-			if (++idx <= start || !net_eq(svc->net, net))
+			if (++idx <= start || (svc->ipvs != ipvs))
 				continue;
 			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
 				idx--;
@@ -3028,7 +3035,7 @@ nla_put_failure:
 	return skb->len;
 }
 
-static int ip_vs_genl_parse_service(struct net *net,
+static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
 				    struct ip_vs_service_user_kern *usvc,
 				    struct nlattr *nla, int full_entry,
 				    struct ip_vs_service **ret_svc)
@@ -3073,9 +3080,9 @@ static int ip_vs_genl_parse_service(struct net *net,
 
 	rcu_read_lock();
 	if (usvc->fwmark)
-		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
+		svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
 	else
-		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
+		svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
 					   &usvc->addr, usvc->port);
 	rcu_read_unlock();
 	*ret_svc = svc;
@@ -3113,14 +3120,14 @@ static int ip_vs_genl_parse_service(struct net *net,
 	return 0;
 }
 
-static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
 						     struct nlattr *nla)
 {
 	struct ip_vs_service_user_kern usvc;
 	struct ip_vs_service *svc;
 	int ret;
 
-	ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
+	ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
 	return ret ? ERR_PTR(ret) : svc;
 }
 
@@ -3195,7 +3202,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 	struct ip_vs_service *svc;
 	struct ip_vs_dest *dest;
 	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
-	struct net *net = skb_sknet(skb);
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	mutex_lock(&__ip_vs_mutex);
 
@@ -3205,7 +3213,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
 		goto out_err;
 
 
-	svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
+	svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
 	if (IS_ERR(svc) || svc == NULL)
 		goto out_err;
 
@@ -3341,7 +3349,7 @@ nla_put_failure:
 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
 				   struct netlink_callback *cb)
 {
-	struct net *net = skb_sknet(skb);
+	struct net *net = sock_net(skb->sk);
 	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	mutex_lock(&ipvs->sync_mutex);
@@ -3367,9 +3375,8 @@ nla_put_failure:
 	return skb->len;
 }
 
-static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ipvs_sync_daemon_cfg c;
 	struct nlattr *a;
 	int ret;
@@ -3426,33 +3433,32 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
 
 	rtnl_lock();
 	mutex_lock(&ipvs->sync_mutex);
-	ret = start_sync_thread(net, &c,
+	ret = start_sync_thread(ipvs, &c,
 				nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
 	mutex_unlock(&ipvs->sync_mutex);
 	rtnl_unlock();
 	return ret;
 }
 
-static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	int ret;
 
 	if (!attrs[IPVS_DAEMON_ATTR_STATE])
 		return -EINVAL;
 
 	mutex_lock(&ipvs->sync_mutex);
-	ret = stop_sync_thread(net,
+	ret = stop_sync_thread(ipvs,
 			       nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
 	mutex_unlock(&ipvs->sync_mutex);
 	return ret;
 }
 
-static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
 {
 	struct ip_vs_timeout_user t;
 
-	__ip_vs_get_timeouts(net, &t);
+	__ip_vs_get_timeouts(ipvs, &t);
 
 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
 		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3464,17 +3470,15 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
 	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
 		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
 
-	return ip_vs_set_timeout(net, &t);
+	return ip_vs_set_timeout(ipvs, &t);
 }
 
 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
 {
 	int ret = -EINVAL, cmd;
-	struct net *net;
-	struct netns_ipvs *ipvs;
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	net = skb_sknet(skb);
-	ipvs = net_ipvs(net);
 	cmd = info->genlhdr->cmd;
 
 	if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3487,9 +3491,9 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
 			goto out;
 
 		if (cmd == IPVS_CMD_NEW_DAEMON)
-			ret = ip_vs_genl_new_daemon(net, daemon_attrs);
+			ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
 		else
-			ret = ip_vs_genl_del_daemon(net, daemon_attrs);
+			ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
 	}
 
 out:
@@ -3503,22 +3507,22 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct ip_vs_dest_user_kern udest;
 	int ret = 0, cmd;
 	int need_full_svc = 0, need_full_dest = 0;
-	struct net *net;
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	net = skb_sknet(skb);
 	cmd = info->genlhdr->cmd;
 
 	mutex_lock(&__ip_vs_mutex);
 
 	if (cmd == IPVS_CMD_FLUSH) {
-		ret = ip_vs_flush(net, false);
+		ret = ip_vs_flush(ipvs, false);
 		goto out;
 	} else if (cmd == IPVS_CMD_SET_CONFIG) {
-		ret = ip_vs_genl_set_config(net, info->attrs);
+		ret = ip_vs_genl_set_config(ipvs, info->attrs);
 		goto out;
 	} else if (cmd == IPVS_CMD_ZERO &&
 		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
-		ret = ip_vs_zero_all(net);
+		ret = ip_vs_zero_all(ipvs);
 		goto out;
 	}
 
@@ -3528,7 +3532,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
 		need_full_svc = 1;
 
-	ret = ip_vs_genl_parse_service(net, &usvc,
+	ret = ip_vs_genl_parse_service(ipvs, &usvc,
 				       info->attrs[IPVS_CMD_ATTR_SERVICE],
 				       need_full_svc, &svc);
 	if (ret)
@@ -3567,7 +3571,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 			/* The synchronization protocol is incompatible
 			 * with mixed family services
 			 */
-			if (net_ipvs(net)->sync_state) {
+			if (ipvs->sync_state) {
 				ret = -EINVAL;
 				goto out;
 			}
@@ -3587,7 +3591,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
 	switch (cmd) {
 	case IPVS_CMD_NEW_SERVICE:
 		if (svc == NULL)
-			ret = ip_vs_add_service(net, &usvc, &svc);
+			ret = ip_vs_add_service(ipvs, &usvc, &svc);
 		else
 			ret = -EEXIST;
 		break;
@@ -3625,9 +3629,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *msg;
 	void *reply;
 	int ret, cmd, reply_cmd;
-	struct net *net;
+	struct net *net = sock_net(skb->sk);
+	struct netns_ipvs *ipvs = net_ipvs(net);
 
-	net = skb_sknet(skb);
 	cmd = info->genlhdr->cmd;
 
 	if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3656,7 +3660,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	{
 		struct ip_vs_service *svc;
 
-		svc = ip_vs_genl_find_service(net,
+		svc = ip_vs_genl_find_service(ipvs,
 					      info->attrs[IPVS_CMD_ATTR_SERVICE]);
 		if (IS_ERR(svc)) {
 			ret = PTR_ERR(svc);
@@ -3677,7 +3681,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	{
 		struct ip_vs_timeout_user t;
 
-		__ip_vs_get_timeouts(net, &t);
+		__ip_vs_get_timeouts(ipvs, &t);
 #ifdef CONFIG_IP_VS_PROTO_TCP
 		if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
 				t.tcp_timeout) ||
@@ -3832,10 +3836,10 @@ static void ip_vs_genl_unregister(void)
  * per netns intit/exit func.
  */
 #ifdef CONFIG_SYSCTL
-static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
 {
+	struct net *net = ipvs->net;
 	int idx;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ctl_table *tbl;
 
 	atomic_set(&ipvs->dropentry, 0);
@@ -3854,6 +3858,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	} else
 		tbl = vs_vars;
 	/* Initialize sysctl defaults */
+	for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
+		if (tbl[idx].proc_handler == proc_do_defense_mode)
+			tbl[idx].extra2 = ipvs;
+	}
 	idx = 0;
 	ipvs->sysctl_amemthresh = 1024;
 	tbl[idx++].data = &ipvs->sysctl_amemthresh;
@@ -3895,7 +3903,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	tbl[idx++].data = &ipvs->sysctl_backup_only;
 	ipvs->sysctl_conn_reuse_mode = 1;
 	tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
-
+	tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
+	tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
 
 	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
 	if (ipvs->sysctl_hdr == NULL) {
@@ -3903,7 +3912,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 			kfree(tbl);
 		return -ENOMEM;
 	}
-	ip_vs_start_estimator(net, &ipvs->tot_stats);
+	ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
 	ipvs->sysctl_tbl = tbl;
 	/* Schedule defense work */
 	INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
@@ -3912,14 +3921,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
 	return 0;
 }
 
-static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct net *net = ipvs->net;
 
 	cancel_delayed_work_sync(&ipvs->defense_work);
 	cancel_work_sync(&ipvs->defense_work.work);
 	unregister_net_sysctl_table(ipvs->sysctl_hdr);
-	ip_vs_stop_estimator(net, &ipvs->tot_stats);
+	ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
 
 	if (!net_eq(net, &init_net))
 		kfree(ipvs->sysctl_tbl);
@@ -3927,8 +3936,8 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
 
 #else
 
-static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
-static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
 
 #endif
 
@@ -3936,10 +3945,10 @@ static struct notifier_block ip_vs_dst_notifier = {
 	.notifier_call = ip_vs_dst_event,
 };
 
-int __net_init ip_vs_control_net_init(struct net *net)
+int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
 {
+	struct net *net = ipvs->net;
 	int i, idx;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	/* Initialize rs_table */
 	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
@@ -3948,7 +3957,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	INIT_LIST_HEAD(&ipvs->dest_trash);
 	spin_lock_init(&ipvs->dest_trash_lock);
 	setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
-		    (unsigned long) net);
+		    (unsigned long) ipvs);
 	atomic_set(&ipvs->ftpsvc_counter, 0);
 	atomic_set(&ipvs->nullsvc_counter, 0);
 
@@ -3970,7 +3979,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
 	proc_create("ip_vs_stats_percpu", 0, net->proc_net,
 		    &ip_vs_stats_percpu_fops);
 
-	if (ip_vs_control_net_init_sysctl(net))
+	if (ip_vs_control_net_init_sysctl(ipvs))
 		goto err;
 
 	return 0;
@@ -3980,12 +3989,12 @@ err:
 	return -ENOMEM;
 }
 
-void __net_exit ip_vs_control_net_cleanup(struct net *net)
+void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct net *net = ipvs->net;
 
-	ip_vs_trash_cleanup(net);
-	ip_vs_control_net_cleanup_sysctl(net);
+	ip_vs_trash_cleanup(ipvs);
+	ip_vs_control_net_cleanup_sysctl(ipvs);
 	remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
 	remove_proc_entry("ip_vs_stats", net->proc_net);
 	remove_proc_entry("ip_vs", net->proc_net);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ef0eb0a8d552..457c6c193e13 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -102,10 +102,8 @@ static void estimation_timer(unsigned long arg)
 	struct ip_vs_estimator *e;
 	struct ip_vs_stats *s;
 	u64 rate;
-	struct net *net = (struct net *)arg;
-	struct netns_ipvs *ipvs;
+	struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
 
-	ipvs = net_ipvs(net);
 	spin_lock(&ipvs->est_lock);
 	list_for_each_entry(e, &ipvs->est_list, list) {
 		s = container_of(e, struct ip_vs_stats, est);
@@ -140,9 +138,8 @@ static void estimation_timer(unsigned long arg)
 	mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
 }
 
-void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_estimator *est = &stats->est;
 
 	INIT_LIST_HEAD(&est->list);
@@ -152,9 +149,8 @@ void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
 	spin_unlock_bh(&ipvs->est_lock);
 }
 
-void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_estimator *est = &stats->est;
 
 	spin_lock_bh(&ipvs->est_lock);
@@ -192,18 +188,16 @@ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
 	dst->outbps = (e->outbps + 0xF) >> 5;
 }
 
-int __net_init ip_vs_estimator_net_init(struct net *net)
+int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
 	INIT_LIST_HEAD(&ipvs->est_list);
 	spin_lock_init(&ipvs->est_lock);
-	setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+	setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs);
 	mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
 	return 0;
 }
 
-void __net_exit ip_vs_estimator_net_cleanup(struct net *net)
+void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs)
 {
-	del_timer_sync(&net_ipvs(net)->est_timer);
+	del_timer_sync(&ipvs->est_timer);
 }
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 5d3daae98bf0..d30c327bb578 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -181,7 +181,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	int ret = 0;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
-	struct net *net;
 
 	*diff = 0;
 
@@ -223,14 +222,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 */
 		{
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+			ip_vs_conn_fill_param(cp->ipvs, AF_INET,
 					      iph->protocol, &from, port,
 					      &cp->caddr, 0, &p);
 			n_cp = ip_vs_conn_out_get(&p);
 		}
 		if (!n_cp) {
 			struct ip_vs_conn_param p;
-			ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+			ip_vs_conn_fill_param(cp->ipvs,
 					      AF_INET, IPPROTO_TCP, &cp->caddr,
 					      0, &cp->vaddr, port, &p);
 			/* As above, this is ipv4 only */
@@ -289,9 +288,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 * would be adjusted twice.
 		 */
 
-		net = skb_net(skb);
 		cp->app_data = NULL;
-		ip_vs_tcp_conn_listen(net, n_cp);
+		ip_vs_tcp_conn_listen(n_cp);
 		ip_vs_conn_put(n_cp);
 		return ret;
 	}
@@ -320,7 +318,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	union nf_inet_addr to;
 	__be16 port;
 	struct ip_vs_conn *n_cp;
-	struct net *net;
 
 	/* no diff required for incoming packets */
 	*diff = 0;
@@ -392,7 +389,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 
 	{
 		struct ip_vs_conn_param p;
-		ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+		ip_vs_conn_fill_param(cp->ipvs, AF_INET,
 				      iph->protocol, &to, port, &cp->vaddr,
 				      htons(ntohs(cp->vport)-1), &p);
 		n_cp = ip_vs_conn_in_get(&p);
@@ -413,8 +410,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
 	/*
 	 *	Move tunnel to listen state
 	 */
-	net = skb_net(skb);
-	ip_vs_tcp_conn_listen(net, n_cp);
+	ip_vs_tcp_conn_listen(n_cp);
 	ip_vs_conn_put(n_cp);
 
 	return 1;
@@ -447,14 +443,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
 	if (!ipvs)
 		return -ENOENT;
 
-	app = register_ip_vs_app(net, &ip_vs_ftp);
+	app = register_ip_vs_app(ipvs, &ip_vs_ftp);
 	if (IS_ERR(app))
 		return PTR_ERR(app);
 
 	for (i = 0; i < ports_count; i++) {
 		if (!ports[i])
 			continue;
-		ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
+		ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]);
 		if (ret)
 			goto err_unreg;
 		pr_info("%s: loaded support on port[%d] = %d\n",
@@ -463,7 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
 	return 0;
 
 err_unreg:
-	unregister_ip_vs_app(net, &ip_vs_ftp);
+	unregister_ip_vs_app(ipvs, &ip_vs_ftp);
 	return ret;
 }
 /*
@@ -471,7 +467,12 @@ err_unreg:
  */
 static void __ip_vs_ftp_exit(struct net *net)
 {
-	unregister_ip_vs_app(net, &ip_vs_ftp);
+	struct netns_ipvs *ipvs = net_ipvs(net);
+
+	if (!ipvs)
+		return;
+
+	unregister_ip_vs_app(ipvs, &ip_vs_ftp);
 }
 
 static struct pernet_operations ip_vs_ftp_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 127f14046c51..cccf4d637412 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -250,8 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
 static int sysctl_lblc_expiration(struct ip_vs_service *svc)
 {
 #ifdef CONFIG_SYSCTL
-	struct netns_ipvs *ipvs = net_ipvs(svc->net);
-	return ipvs->sysctl_lblc_expiration;
+	return svc->ipvs->sysctl_lblc_expiration;
 #else
 	return DEFAULT_EXPIRATION;
 #endif
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 2229d2d8bbe0..796d70e47ddd 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -415,8 +415,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
 static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
 {
 #ifdef CONFIG_SYSCTL
-	struct netns_ipvs *ipvs = net_ipvs(svc->net);
-	return ipvs->sysctl_lblcr_expiration;
+	return svc->ipvs->sysctl_lblcr_expiration;
 #else
 	return DEFAULT_EXPIRATION;
 #endif
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 136184572fc9..30434fb133df 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -161,7 +161,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
 
 	/* RS->CLIENT */
 	orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-	ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
+	ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
 			      &orig->src.u3, orig->src.u.tcp.port,
 			      &orig->dst.u3, orig->dst.u.tcp.port, &p);
 	cp = ip_vs_conn_out_get(&p);
@@ -274,8 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 		" for conn " FMT_CONN "\n",
 		__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
 
-	h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt,
-				  &tuple);
+	h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
 		/* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index bed5f7042529..1b8d594e493a 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
 	const char *dptr;
 	int retc;
 
-	ip_vs_fill_iph_skb(p->af, skb, &iph);
+	ip_vs_fill_iph_skb(p->af, skb, false, &iph);
 
 	/* Only useful with UDP */
 	if (iph.protocol != IPPROTO_UDP)
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 939f7fbe9b46..8ae480715cea 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -63,9 +63,8 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
  *	register an ipvs protocols netns related data
  */
 static int
-register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
 	struct ip_vs_proto_data *pd =
 			kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
@@ -79,7 +78,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
 	atomic_set(&pd->appcnt, 0);	/* Init app counter */
 
 	if (pp->init_netns != NULL) {
-		int ret = pp->init_netns(net, pd);
+		int ret = pp->init_netns(ipvs, pd);
 		if (ret) {
 			/* unlink an free proto data */
 			ipvs->proto_data_table[hash] = pd->next;
@@ -116,9 +115,8 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
  *	unregister an ipvs protocols netns data
  */
 static int
-unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data **pd_p;
 	unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol);
 
@@ -127,7 +125,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
 		if (*pd_p == pd) {
 			*pd_p = pd->next;
 			if (pd->pp->exit_netns != NULL)
-				pd->pp->exit_netns(net, pd);
+				pd->pp->exit_netns(ipvs, pd);
 			kfree(pd);
 			return 0;
 		}
@@ -156,8 +154,8 @@ EXPORT_SYMBOL(ip_vs_proto_get);
 /*
  *	get ip_vs_protocol object data by netns and proto
  */
-static struct ip_vs_proto_data *
-__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
 {
 	struct ip_vs_proto_data *pd;
 	unsigned int hash = IP_VS_PROTO_HASH(proto);
@@ -169,14 +167,6 @@ __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
 
 	return NULL;
 }
-
-struct ip_vs_proto_data *
-ip_vs_proto_data_get(struct net *net, unsigned short proto)
-{
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
-	return __ipvs_proto_data_get(ipvs, proto);
-}
 EXPORT_SYMBOL(ip_vs_proto_data_get);
 
 /*
@@ -317,7 +307,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
 /*
  * per network name-space init
  */
-int __net_init ip_vs_protocol_net_init(struct net *net)
+int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs)
 {
 	int i, ret;
 	static struct ip_vs_protocol *protos[] = {
@@ -339,27 +329,26 @@ int __net_init ip_vs_protocol_net_init(struct net *net)
 	};
 
 	for (i = 0; i < ARRAY_SIZE(protos); i++) {
-		ret = register_ip_vs_proto_netns(net, protos[i]);
+		ret = register_ip_vs_proto_netns(ipvs, protos[i]);
 		if (ret < 0)
 			goto cleanup;
 	}
 	return 0;
 
 cleanup:
-	ip_vs_protocol_net_cleanup(net);
+	ip_vs_protocol_net_cleanup(ipvs);
 	return ret;
 }
 
-void __net_exit ip_vs_protocol_net_cleanup(struct net *net)
+void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_proto_data *pd;
 	int i;
 
 	/* unregister all the ipvs proto data for this netns */
 	for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
 		while ((pd = ipvs->proto_data_table[i]) != NULL)
-			unregister_ip_vs_proto_netns(net, pd);
+			unregister_ip_vs_proto_netns(ipvs, pd);
 	}
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5de3dd312c0f..5320d39976e1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,30 +41,28 @@ struct isakmp_hdr {
 #define PORT_ISAKMP	500
 
 static void
-ah_esp_conn_fill_param_proto(struct net *net, int af,
-			     const struct ip_vs_iphdr *iph, int inverse,
+ah_esp_conn_fill_param_proto(struct netns_ipvs *ipvs, int af,
+			     const struct ip_vs_iphdr *iph,
 			     struct ip_vs_conn_param *p)
 {
-	if (likely(!inverse))
-		ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
+	if (likely(!ip_vs_iph_inverse(iph)))
+		ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
 				      &iph->saddr, htons(PORT_ISAKMP),
 				      &iph->daddr, htons(PORT_ISAKMP), p);
 	else
-		ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
+		ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
 				      &iph->daddr, htons(PORT_ISAKMP),
 				      &iph->saddr, htons(PORT_ISAKMP), p);
 }
 
 static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb,
-		   const struct ip_vs_iphdr *iph,
-		   int inverse)
+ah_esp_conn_in_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+		   const struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_conn *cp;
 	struct ip_vs_conn_param p;
-	struct net *net = skb_net(skb);
 
-	ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
+	ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
 	cp = ip_vs_conn_in_get(&p);
 	if (!cp) {
 		/*
@@ -73,7 +71,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
 		 */
 		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
 			      "%s%s %s->%s\n",
-			      inverse ? "ICMP+" : "",
+			      ip_vs_iph_icmp(iph) ? "ICMP+" : "",
 			      ip_vs_proto_get(iph->protocol)->name,
 			      IP_VS_DBG_ADDR(af, &iph->saddr),
 			      IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -84,19 +82,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
 
 
 static struct ip_vs_conn *
-ah_esp_conn_out_get(int af, const struct sk_buff *skb,
-		    const struct ip_vs_iphdr *iph, int inverse)
+ah_esp_conn_out_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+		    const struct ip_vs_iphdr *iph)
 {
 	struct ip_vs_conn *cp;
 	struct ip_vs_conn_param p;
-	struct net *net = skb_net(skb);
 
-	ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
+	ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
 	cp = ip_vs_conn_out_get(&p);
 	if (!cp) {
 		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
 			      "%s%s %s->%s\n",
-			      inverse ? "ICMP+" : "",
+			      ip_vs_iph_icmp(iph) ? "ICMP+" : "",
 			      ip_vs_proto_get(iph->protocol)->name,
 			      IP_VS_DBG_ADDR(af, &iph->saddr),
 			      IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -107,7 +104,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
 
 
 static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+ah_esp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		     struct ip_vs_proto_data *pd,
 		     int *verdict, struct ip_vs_conn **cpp,
 		     struct ip_vs_iphdr *iph)
 {
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 5b84c0b56642..010ddeec135f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,35 +9,44 @@
 #include <net/ip_vs.h>
 
 static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		   struct ip_vs_proto_data *pd,
 		   int *verdict, struct ip_vs_conn **cpp,
 		   struct ip_vs_iphdr *iph)
 {
-	struct net *net;
 	struct ip_vs_service *svc;
-	struct netns_ipvs *ipvs;
 	sctp_chunkhdr_t _schunkh, *sch;
 	sctp_sctphdr_t *sh, _sctph;
-
-	sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
-	if (sh == NULL) {
-		*verdict = NF_DROP;
-		return 0;
+	__be16 _ports[2], *ports = NULL;
+
+	if (likely(!ip_vs_iph_icmp(iph))) {
+		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+		if (sh) {
+			sch = skb_header_pointer(
+				skb, iph->len + sizeof(sctp_sctphdr_t),
+				sizeof(_schunkh), &_schunkh);
+			if (sch && (sch->type == SCTP_CID_INIT ||
+				    sysctl_sloppy_sctp(ipvs)))
+				ports = &sh->source;
+		}
+	} else {
+		ports = skb_header_pointer(
+			skb, iph->len, sizeof(_ports), &_ports);
 	}
 
-	sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
-				 sizeof(_schunkh), &_schunkh);
-	if (sch == NULL) {
+	if (!ports) {
 		*verdict = NF_DROP;
 		return 0;
 	}
 
-	net = skb_net(skb);
-	ipvs = net_ipvs(net);
 	rcu_read_lock();
-	if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
-	    (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
-				      &iph->daddr, sh->dest))) {
+	if (likely(!ip_vs_iph_inverse(iph)))
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->daddr, ports[1]);
+	else
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->saddr, ports[0]);
+	if (svc) {
 		int ignored;
 
 		if (ip_vs_todrop(ipvs)) {
@@ -474,14 +483,13 @@ static inline __u16 sctp_app_hashkey(__be16 port)
 		& SCTP_APP_TAB_MASK;
 }
 
-static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
+static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
-	struct netns_ipvs *ipvs = net_ipvs(net);
-	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
 
 	hash = sctp_app_hashkey(port);
 
@@ -498,9 +506,9 @@ out:
 	return ret;
 }
 
-static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
+static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 {
-	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
 
 	atomic_dec(&pd->appcnt);
 	list_del_rcu(&inc->p_list);
@@ -508,7 +516,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
 
 static int sctp_app_conn_bind(struct ip_vs_conn *cp)
 {
-	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+	struct netns_ipvs *ipvs = cp->ipvs;
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -549,10 +557,8 @@ out:
  *   timeouts is netns related now.
  * ---------------------------------------------
  */
-static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
 	ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
 							sizeof(sctp_timeouts));
@@ -561,7 +567,7 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
 	return 0;
 }
 
-static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 {
 	kfree(pd->timeout_table);
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 8e92beb0cca9..d7024b2ed769 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -32,27 +32,47 @@
 #include <net/ip_vs.h>
 
 static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		  struct ip_vs_proto_data *pd,
 		  int *verdict, struct ip_vs_conn **cpp,
 		  struct ip_vs_iphdr *iph)
 {
-	struct net *net;
 	struct ip_vs_service *svc;
 	struct tcphdr _tcph, *th;
-	struct netns_ipvs *ipvs;
+	__be16 _ports[2], *ports = NULL;
 
-	th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
-	if (th == NULL) {
+	/* In the event of icmp, we're only guaranteed to have the first 8
+	 * bytes of the transport header, so we only check the rest of the
+	 * TCP packet for non-ICMP packets
+	 */
+	if (likely(!ip_vs_iph_icmp(iph))) {
+		th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+		if (th) {
+			if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
+				return 1;
+			ports = &th->source;
+		}
+	} else {
+		ports = skb_header_pointer(
+			skb, iph->len, sizeof(_ports), &_ports);
+	}
+
+	if (!ports) {
 		*verdict = NF_DROP;
 		return 0;
 	}
-	net = skb_net(skb);
-	ipvs = net_ipvs(net);
+
 	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
 	rcu_read_lock();
-	if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
-	    (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
-				      &iph->daddr, th->dest))) {
+
+	if (likely(!ip_vs_iph_inverse(iph)))
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->daddr, ports[1]);
+	else
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->saddr, ports[0]);
+
+	if (svc) {
 		int ignored;
 
 		if (ip_vs_todrop(ipvs)) {
@@ -571,14 +591,13 @@ static inline __u16 tcp_app_hashkey(__be16 port)
 }
 
 
-static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
+static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
-	struct netns_ipvs *ipvs = net_ipvs(net);
-	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
 
 	hash = tcp_app_hashkey(port);
 
@@ -597,9 +616,9 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
 
 
 static void
-tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
+tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 {
-	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
 
 	atomic_dec(&pd->appcnt);
 	list_del_rcu(&inc->p_list);
@@ -609,7 +628,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
 static int
 tcp_app_conn_bind(struct ip_vs_conn *cp)
 {
-	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+	struct netns_ipvs *ipvs = cp->ipvs;
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -653,9 +672,9 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 /*
  *	Set LISTEN timeout. (ip_vs_conn_put will setup timer)
  */
-void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
 {
-	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP);
 
 	spin_lock_bh(&cp->lock);
 	cp->state = IP_VS_TCP_S_LISTEN;
@@ -668,10 +687,8 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
  *   timeouts is netns related now.
  * ---------------------------------------------
  */
-static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
 	ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
 							sizeof(tcp_timeouts));
@@ -681,7 +698,7 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
 	return 0;
 }
 
-static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 {
 	kfree(pd->timeout_table);
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index b62a3c0ff9bf..e494e9a88c7f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -29,28 +29,42 @@
 #include <net/ip6_checksum.h>
 
 static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+		  struct ip_vs_proto_data *pd,
 		  int *verdict, struct ip_vs_conn **cpp,
 		  struct ip_vs_iphdr *iph)
 {
-	struct net *net;
 	struct ip_vs_service *svc;
 	struct udphdr _udph, *uh;
+	__be16 _ports[2], *ports = NULL;
 
-	/* IPv6 fragments, only first fragment will hit this */
-	uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
-	if (uh == NULL) {
+	if (likely(!ip_vs_iph_icmp(iph))) {
+		/* IPv6 fragments, only first fragment will hit this */
+		uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+		if (uh)
+			ports = &uh->source;
+	} else {
+		ports = skb_header_pointer(
+			skb, iph->len, sizeof(_ports), &_ports);
+	}
+
+	if (!ports) {
 		*verdict = NF_DROP;
 		return 0;
 	}
-	net = skb_net(skb);
+
 	rcu_read_lock();
-	svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
-				 &iph->daddr, uh->dest);
+	if (likely(!ip_vs_iph_inverse(iph)))
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->daddr, ports[1]);
+	else
+		svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+					 &iph->saddr, ports[0]);
+
 	if (svc) {
 		int ignored;
 
-		if (ip_vs_todrop(net_ipvs(net))) {
+		if (ip_vs_todrop(ipvs)) {
 			/*
 			 * It seems that we are very loaded.
 			 * We have to drop this packet :(
@@ -348,14 +362,13 @@ static inline __u16 udp_app_hashkey(__be16 port)
 }
 
 
-static int udp_register_app(struct net *net, struct ip_vs_app *inc)
+static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 {
 	struct ip_vs_app *i;
 	__u16 hash;
 	__be16 port = inc->port;
 	int ret = 0;
-	struct netns_ipvs *ipvs = net_ipvs(net);
-	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
 
 	hash = udp_app_hashkey(port);
 
@@ -374,9 +387,9 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
 
 
 static void
-udp_unregister_app(struct net *net, struct ip_vs_app *inc)
+udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
 {
-	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
 
 	atomic_dec(&pd->appcnt);
 	list_del_rcu(&inc->p_list);
@@ -385,7 +398,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc)
 
 static int udp_app_conn_bind(struct ip_vs_conn *cp)
 {
-	struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+	struct netns_ipvs *ipvs = cp->ipvs;
 	int hash;
 	struct ip_vs_app *inc;
 	int result = 0;
@@ -456,10 +469,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
 	cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
 }
 
-static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
 	ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
 	pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
 							sizeof(udp_timeouts));
@@ -468,7 +479,7 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
 	return 0;
 }
 
-static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
 {
 	kfree(pd->timeout_table);
 }
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 98a13433b68c..1e373a5e44e3 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
 static inline __be16
 ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
 {
-	__be16 port;
-	struct tcphdr _tcph, *th;
-	struct udphdr _udph, *uh;
-	sctp_sctphdr_t _sctph, *sh;
+	__be16 _ports[2], *ports;
 
+	/* At this point we know that we have a valid packet of some kind.
+	 * Because ICMP packets are only guaranteed to have the first 8
+	 * bytes, let's just grab the ports.  Fortunately they're in the
+	 * same position for all three of the protocols we care about.
+	 */
 	switch (iph->protocol) {
 	case IPPROTO_TCP:
-		th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
-		if (unlikely(th == NULL))
-			return 0;
-		port = th->source;
-		break;
 	case IPPROTO_UDP:
-		uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
-		if (unlikely(uh == NULL))
-			return 0;
-		port = uh->source;
-		break;
 	case IPPROTO_SCTP:
-		sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
-		if (unlikely(sh == NULL))
+		ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
+					   &_ports);
+		if (unlikely(!ports))
 			return 0;
-		port = sh->source;
-		break;
+
+		if (likely(!ip_vs_iph_inverse(iph)))
+			return ports[0];
+		else
+			return ports[1];
 	default:
-		port = 0;
+		return 0;
 	}
-
-	return port;
 }
 
 
@@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
 	struct ip_vs_dest *dest;
 	struct ip_vs_sh_state *s;
 	__be16 port = 0;
+	const union nf_inet_addr *hash_addr;
+
+	hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
 
 	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
 
@@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
 	s = (struct ip_vs_sh_state *) svc->sched_data;
 
 	if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
-		dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
+		dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port);
 	else
-		dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
+		dest = ip_vs_sh_get(svc, s, hash_addr, port);
 
 	if (!dest) {
 		ip_vs_scheduler_err(svc, "no destination available");
@@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
 	}
 
 	IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
-		      IP_VS_DBG_ADDR(svc->af, &iph->saddr),
+		      IP_VS_DBG_ADDR(svc->af, hash_addr),
 		      IP_VS_DBG_ADDR(dest->af, &dest->addr),
 		      ntohs(dest->port));
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 43f140950075..803001a45aa1 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -193,7 +193,7 @@ union ip_vs_sync_conn {
 #define IPVS_OPT_F_PARAM	(1 << (IPVS_OPT_PARAM-1))
 
 struct ip_vs_sync_thread_data {
-	struct net *net;
+	struct netns_ipvs *ipvs;
 	struct socket *sock;
 	char *buf;
 	int id;
@@ -533,10 +533,9 @@ set:
  *      Version 0 , could be switched in by sys_ctl.
  *      Add an ip_vs_conn information into the current sync_buff.
  */
-static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
+static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
 			       int pkts)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_sync_mesg_v0 *m;
 	struct ip_vs_sync_conn_v0 *s;
 	struct ip_vs_sync_buff *buff;
@@ -615,7 +614,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
 			pkts = atomic_add_return(1, &cp->in_pkts);
 		else
 			pkts = sysctl_sync_threshold(ipvs);
-		ip_vs_sync_conn(net, cp, pkts);
+		ip_vs_sync_conn(ipvs, cp, pkts);
 	}
 }
 
@@ -624,9 +623,8 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
  *      Called by ip_vs_in.
  *      Sending Version 1 messages
  */
-void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
+void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_sync_mesg *m;
 	union ip_vs_sync_conn *s;
 	struct ip_vs_sync_buff *buff;
@@ -637,7 +635,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
 
 	/* Handle old version of the protocol */
 	if (sysctl_sync_ver(ipvs) == 0) {
-		ip_vs_sync_conn_v0(net, cp, pkts);
+		ip_vs_sync_conn_v0(ipvs, cp, pkts);
 		return;
 	}
 	/* Do not sync ONE PACKET */
@@ -784,21 +782,21 @@ control:
  *  fill_param used by version 1
  */
 static inline int
-ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
 			   struct ip_vs_conn_param *p,
 			   __u8 *pe_data, unsigned int pe_data_len,
 			   __u8 *pe_name, unsigned int pe_name_len)
 {
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
-		ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+		ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
 				      (const union nf_inet_addr *)&sc->v6.caddr,
 				      sc->v6.cport,
 				      (const union nf_inet_addr *)&sc->v6.vaddr,
 				      sc->v6.vport, p);
 	else
 #endif
-		ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+		ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
 				      (const union nf_inet_addr *)&sc->v4.caddr,
 				      sc->v4.cport,
 				      (const union nf_inet_addr *)&sc->v4.vaddr,
@@ -837,7 +835,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
  *  Param: ...
  *         timeout is in sec.
  */
-static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
 			    unsigned int flags, unsigned int state,
 			    unsigned int protocol, unsigned int type,
 			    const union nf_inet_addr *daddr, __be16 dport,
@@ -846,7 +844,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 {
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *cp;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
 		cp = ip_vs_conn_in_get(param);
@@ -904,7 +901,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 		 * with synchronization, so we can make the assumption that
 		 * the svc_af is the same as the dest_af
 		 */
-		dest = ip_vs_find_dest(net, type, type, daddr, dport,
+		dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
 				       param->vaddr, param->vport, protocol,
 				       fwmark, flags);
 
@@ -941,7 +938,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 	} else {
 		struct ip_vs_proto_data *pd;
 
-		pd = ip_vs_proto_data_get(net, protocol);
+		pd = ip_vs_proto_data_get(ipvs, protocol);
 		if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
 			cp->timeout = pd->timeout_table[state];
 		else
@@ -953,7 +950,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
 /*
  *  Process received multicast message for Version 0
  */
-static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
 				     const size_t buflen)
 {
 	struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
@@ -1009,14 +1006,14 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
 			}
 		}
 
-		ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+		ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
 				      (const union nf_inet_addr *)&s->caddr,
 				      s->cport,
 				      (const union nf_inet_addr *)&s->vaddr,
 				      s->vport, &param);
 
 		/* Send timeout as Zero */
-		ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
+		ip_vs_proc_conn(ipvs, &param, flags, state, s->protocol, AF_INET,
 				(union nf_inet_addr *)&s->daddr, s->dport,
 				0, 0, opt);
 	}
@@ -1067,7 +1064,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
 /*
  *   Process a Version 1 sync. connection
  */
-static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
 {
 	struct ip_vs_sync_conn_options opt;
 	union  ip_vs_sync_conn *s;
@@ -1171,21 +1168,21 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
 			state = 0;
 		}
 	}
-	if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
+	if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
 				       pe_data_len, pe_name, pe_name_len)) {
 		retc = 50;
 		goto out;
 	}
 	/* If only IPv4, just silent skip IPv6 */
 	if (af == AF_INET)
-		ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
+		ip_vs_proc_conn(ipvs, &param, flags, state, s->v4.protocol, af,
 				(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
 				ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
 				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
 				);
 #ifdef CONFIG_IP_VS_IPV6
 	else
-		ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
+		ip_vs_proc_conn(ipvs, &param, flags, state, s->v6.protocol, af,
 				(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
 				ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
 				(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
@@ -1204,10 +1201,9 @@ out:
  *      ip_vs_conn entries.
  *      Handles Version 0 & 1
  */
-static void ip_vs_process_message(struct net *net, __u8 *buffer,
+static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
 				  const size_t buflen)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
 	__u8 *p, *msg_end;
 	int i, nr_conns;
@@ -1257,7 +1253,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
 				return;
 			}
 			/* Process a single sync_conn */
-			retc = ip_vs_proc_sync_conn(net, p, msg_end);
+			retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
 			if (retc < 0) {
 				IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
 					     retc);
@@ -1268,7 +1264,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
 		}
 	} else {
 		/* Old type of message */
-		ip_vs_process_message_v0(net, buffer, buflen);
+		ip_vs_process_message_v0(ipvs, buffer, buflen);
 		return;
 	}
 }
@@ -1493,16 +1489,15 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
 /*
  *      Set up sending multicast socket over UDP
  */
-static struct socket *make_send_sock(struct net *net, int id)
+static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	/* multicast addr */
 	union ipvs_sockaddr mcast_addr;
 	struct socket *sock;
 	int result, salen;
 
 	/* First create a socket */
-	result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
+	result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
 				  IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
@@ -1550,16 +1545,15 @@ error:
 /*
  *      Set up receiving multicast socket over UDP
  */
-static struct socket *make_receive_sock(struct net *net, int id)
+static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	/* multicast addr */
 	union ipvs_sockaddr mcast_addr;
 	struct socket *sock;
 	int result, salen;
 
 	/* First create a socket */
-	result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
+	result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
 				  IPPROTO_UDP, &sock);
 	if (result < 0) {
 		pr_err("Error during creation of socket; terminating\n");
@@ -1687,7 +1681,7 @@ next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
 static int sync_thread_master(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
-	struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+	struct netns_ipvs *ipvs = tinfo->ipvs;
 	struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
 	struct sock *sk = tinfo->sock->sk;
 	struct ip_vs_sync_buff *sb;
@@ -1743,7 +1737,7 @@ done:
 static int sync_thread_backup(void *data)
 {
 	struct ip_vs_sync_thread_data *tinfo = data;
-	struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+	struct netns_ipvs *ipvs = tinfo->ipvs;
 	int len;
 
 	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
@@ -1765,7 +1759,7 @@ static int sync_thread_backup(void *data)
 				break;
 			}
 
-			ip_vs_process_message(tinfo->net, tinfo->buf, len);
+			ip_vs_process_message(ipvs, tinfo->buf, len);
 		}
 	}
 
@@ -1778,13 +1772,12 @@ static int sync_thread_backup(void *data)
 }
 
 
-int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
+int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
 		      int state)
 {
 	struct ip_vs_sync_thread_data *tinfo;
 	struct task_struct **array = NULL, *task;
 	struct socket *sock;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct net_device *dev;
 	char *name;
 	int (*threadfn)(void *data);
@@ -1811,7 +1804,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
 	if (!c->mcast_ttl)
 		c->mcast_ttl = 1;
 
-	dev = __dev_get_by_name(net, c->mcast_ifn);
+	dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
 	if (!dev) {
 		pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
 		return -ENODEV;
@@ -1873,9 +1866,9 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
 	tinfo = NULL;
 	for (id = 0; id < count; id++) {
 		if (state == IP_VS_STATE_MASTER)
-			sock = make_send_sock(net, id);
+			sock = make_send_sock(ipvs, id);
 		else
-			sock = make_receive_sock(net, id);
+			sock = make_receive_sock(ipvs, id);
 		if (IS_ERR(sock)) {
 			result = PTR_ERR(sock);
 			goto outtinfo;
@@ -1883,7 +1876,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
 		tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
 		if (!tinfo)
 			goto outsocket;
-		tinfo->net = net;
+		tinfo->ipvs = ipvs;
 		tinfo->sock = sock;
 		if (state == IP_VS_STATE_BACKUP) {
 			tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
@@ -1947,9 +1940,8 @@ out:
 }
 
 
-int stop_sync_thread(struct net *net, int state)
+int stop_sync_thread(struct netns_ipvs *ipvs, int state)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
 	struct task_struct **array;
 	int id;
 	int retc = -EINVAL;
@@ -2015,27 +2007,24 @@ int stop_sync_thread(struct net *net, int state)
 /*
  * Initialize data struct for each netns
  */
-int __net_init ip_vs_sync_net_init(struct net *net)
+int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
 {
-	struct netns_ipvs *ipvs = net_ipvs(net);
-
 	__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
 	spin_lock_init(&ipvs->sync_lock);
 	spin_lock_init(&ipvs->sync_buff_lock);
 	return 0;
 }
 
-void ip_vs_sync_net_cleanup(struct net *net)
+void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
 {
 	int retc;
-	struct netns_ipvs *ipvs = net_ipvs(net);
 
 	mutex_lock(&ipvs->sync_mutex);
-	retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+	retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
 	if (retc && retc != -ESRCH)
 		pr_err("Failed to stop Master Daemon\n");
 
-	retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+	retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
 	if (retc && retc != -ESRCH)
 		pr_err("Failed to stop Backup Daemon\n");
 	mutex_unlock(&ipvs->sync_mutex);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 258a0b0e82a2..3264cb49b333 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -212,19 +212,20 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
 		ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
 }
 
-static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+					  int rt_mode,
 					  struct ip_vs_iphdr *ipvsh,
 					  struct sk_buff *skb, int mtu)
 {
 #ifdef CONFIG_IP_VS_IPV6
 	if (skb_af == AF_INET6) {
-		struct net *net = dev_net(skb_dst(skb)->dev);
+		struct net *net = ipvs->net;
 
 		if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
 			if (!skb->dev)
 				skb->dev = net->loopback_dev;
 			/* only send ICMP too big on first fragment */
-			if (!ipvsh->fragoffs)
+			if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh))
 				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 			IP_VS_DBG(1, "frag needed for %pI6c\n",
 				  &ipv6_hdr(skb)->saddr);
@@ -233,8 +234,6 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
 	} else
 #endif
 	{
-		struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
-
 		/* If we're going to tunnel the packet and pmtu discovery
 		 * is disabled, we'll just fragment it anyway
 		 */
@@ -242,7 +241,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
 			return true;
 
 		if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
-			     skb->len > mtu && !skb_is_gso(skb))) {
+			     skb->len > mtu && !skb_is_gso(skb) &&
+			     !ip_vs_iph_icmp(ipvsh))) {
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
 				  htonl(mtu));
 			IP_VS_DBG(1, "frag needed for %pI4\n",
@@ -256,11 +256,12 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
 
 /* Get route to destination or remote server */
 static int
-__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+		   struct ip_vs_dest *dest,
 		   __be32 daddr, int rt_mode, __be32 *ret_saddr,
 		   struct ip_vs_iphdr *ipvsh)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
+	struct net *net = ipvs->net;
 	struct ip_vs_dest_dst *dest_dst;
 	struct rtable *rt;			/* Route to the other host */
 	int mtu;
@@ -336,7 +337,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
 		maybe_update_pmtu(skb_af, skb, mtu);
 	}
 
-	if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
+	if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
 		goto err_put;
 
 	skb_dst_drop(skb);
@@ -402,11 +403,12 @@ out_err:
  * Get route to destination or remote server
  */
 static int
-__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+		      struct ip_vs_dest *dest,
 		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
 		      struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
+	struct net *net = ipvs->net;
 	struct ip_vs_dest_dst *dest_dst;
 	struct rt6_info *rt;			/* Route to the other host */
 	struct dst_entry *dst;
@@ -484,7 +486,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
 		maybe_update_pmtu(skb_af, skb, mtu);
 	}
 
-	if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
+	if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
 		goto err_put;
 
 	skb_dst_drop(skb);
@@ -573,8 +575,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
 		skb_forward_csum(skb);
 		if (!skb->sk)
 			skb_sender_cpu_clear(skb);
-		NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
-			NULL, skb_dst(skb)->dev, dst_output_sk);
+		NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+			NULL, skb_dst(skb)->dev, dst_output);
 	} else
 		ret = NF_ACCEPT;
 
@@ -595,8 +597,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
 		skb_forward_csum(skb);
 		if (!skb->sk)
 			skb_sender_cpu_clear(skb);
-		NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
-			NULL, skb_dst(skb)->dev, dst_output_sk);
+		NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+			NULL, skb_dst(skb)->dev, dst_output);
 	} else
 		ret = NF_ACCEPT;
 	return ret;
@@ -629,7 +631,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	rcu_read_lock();
-	if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+	if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
 			       IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
 		goto tx_error;
 
@@ -656,10 +658,13 @@ int
 ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
 	EnterFunction(10);
 
 	rcu_read_lock();
-	if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
+	if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
+				  &iph->daddr, NULL,
 				  ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
 		goto tx_error;
 
@@ -706,7 +711,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	was_input = rt_is_input_route(skb_rtable(skb));
-	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
 				   IP_VS_RT_MODE_RDR, NULL, ipvsh);
@@ -723,7 +728,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 		if (ct && !nf_ct_is_untracked(ct)) {
-			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
+			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
 					 "ip_vs_nat_xmit(): "
 					 "stopping DNAT to local address");
 			goto tx_error;
@@ -733,8 +738,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* From world but DNAT to loopback address? */
 	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
-		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
-				 "stopping DNAT to loopback address");
+		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off,
+				 "ip_vs_nat_xmit(): stopping DNAT to loopback "
+				 "address");
 		goto tx_error;
 	}
 
@@ -751,7 +757,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_hdr(skb)->daddr = cp->daddr.ip;
 	ip_send_check(ip_hdr(skb));
 
-	IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
+	IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT");
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
@@ -794,7 +800,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
 	}
 
-	local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+				      &cp->daddr.in6,
 				      NULL, ipvsh, 0,
 				      IP_VS_RT_MODE_LOCAL |
 				      IP_VS_RT_MODE_NON_LOCAL |
@@ -812,7 +819,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
 		if (ct && !nf_ct_is_untracked(ct)) {
-			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
+			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off,
 					 "ip_vs_nat_xmit_v6(): "
 					 "stopping DNAT to local address");
 			goto tx_error;
@@ -823,7 +830,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* From world but DNAT to loopback address? */
 	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
 	    ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
-		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
+		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off,
 				 "ip_vs_nat_xmit_v6(): "
 				 "stopping DNAT to loopback address");
 		goto tx_error;
@@ -841,7 +848,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error;
 	ipv6_hdr(skb)->daddr = cp->daddr.in6;
 
-	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
+	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT");
 
 	/* FIXME: when application helper enlarges the packet and the length
 	   is larger than the MTU of outgoing device, there will be still
@@ -967,8 +974,8 @@ int
 ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
 {
-	struct net *net = skb_net(skb);
-	struct netns_ipvs *ipvs = net_ipvs(net);
+	struct netns_ipvs *ipvs = cp->ipvs;
+	struct net *net = ipvs->net;
 	struct rtable *rt;			/* Route to the other host */
 	__be32 saddr;				/* Source for tunnel */
 	struct net_device *tdev;		/* Device to other host */
@@ -984,7 +991,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	rcu_read_lock();
-	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+	local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
 				   IP_VS_RT_MODE_CONNECT |
@@ -1042,7 +1049,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
-		ip_local_out(skb);
+		ip_local_out(net, skb->sk, skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
 	rcu_read_unlock();
@@ -1078,7 +1085,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	rcu_read_lock();
-	local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+				      &cp->daddr.in6,
 				      &saddr, ipvsh, 1,
 				      IP_VS_RT_MODE_LOCAL |
 				      IP_VS_RT_MODE_NON_LOCAL |
@@ -1133,7 +1141,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
-		ip6_local_out(skb);
+		ip6_local_out(cp->ipvs->net, skb->sk, skb);
 	else if (ret == NF_DROP)
 		kfree_skb(skb);
 	rcu_read_unlock();
@@ -1165,7 +1173,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	rcu_read_lock();
-	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
 				   IP_VS_RT_MODE_LOCAL |
 				   IP_VS_RT_MODE_NON_LOCAL |
 				   IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
@@ -1204,7 +1212,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	rcu_read_lock();
-	local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+				      &cp->daddr.in6,
 				      NULL, ipvsh, 0,
 				      IP_VS_RT_MODE_LOCAL |
 				      IP_VS_RT_MODE_NON_LOCAL |
@@ -1273,7 +1282,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
 	rcu_read_lock();
-	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+	local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
 				   NULL, iph);
 	if (local < 0)
 		goto tx_error;
@@ -1365,8 +1374,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
 		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
 	rcu_read_lock();
-	local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
-				      NULL, ipvsh, 0, rt_mode);
+	local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+				      &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
 	if (local < 0)
 		goto tx_error;
 	rt = (struct rt6_info *) skb_dst(skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c09d6c7198f6..3cb3cb831591 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -168,6 +168,7 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 		unsigned int dataoff,
 		u_int16_t l3num,
 		u_int8_t protonum,
+		struct net *net,
 		struct nf_conntrack_tuple *tuple,
 		const struct nf_conntrack_l3proto *l3proto,
 		const struct nf_conntrack_l4proto *l4proto)
@@ -181,12 +182,13 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 	tuple->dst.protonum = protonum;
 	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
 
-	return l4proto->pkt_to_tuple(skb, dataoff, tuple);
+	return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
 }
 EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
 
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
-		       u_int16_t l3num, struct nf_conntrack_tuple *tuple)
+		       u_int16_t l3num,
+		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_l3proto *l3proto;
 	struct nf_conntrack_l4proto *l4proto;
@@ -205,7 +207,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 
 	l4proto = __nf_ct_l4proto_find(l3num, protonum);
 
-	ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple,
+	ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
 			      l3proto, l4proto);
 
 	rcu_read_unlock();
@@ -938,10 +940,13 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	}
 
 	timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
-	if (timeout_ext)
-		timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
-	else
+	if (timeout_ext) {
+		timeouts = nf_ct_timeout_data(timeout_ext);
+		if (unlikely(!timeouts))
+			timeouts = l4proto->get_timeouts(net);
+	} else {
 		timeouts = l4proto->get_timeouts(net);
+	}
 
 	if (!l4proto->new(ct, skb, dataoff, timeouts)) {
 		nf_conntrack_free(ct);
@@ -950,7 +955,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	}
 
 	if (timeout_ext)
-		nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC);
+		nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
+				      GFP_ATOMIC);
 
 	nf_ct_acct_ext_add(ct, GFP_ATOMIC);
 	nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
@@ -1029,7 +1035,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
 	u32 hash;
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
-			     dataoff, l3num, protonum, &tuple, l3proto,
+			     dataoff, l3num, protonum, net, &tuple, l3proto,
 			     l4proto)) {
 		pr_debug("resolve_normal_ct: Can't get tuple\n");
 		return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 94a66541e0b7..9f5272968abb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2133,9 +2133,9 @@ ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
 		       struct nf_conntrack_tuple *tuple,
 		       struct nf_conntrack_tuple *mask);
 
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
 static size_t
-ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
+ctnetlink_glue_build_size(const struct nf_conn *ct)
 {
 	return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
 	       + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
@@ -2162,8 +2162,19 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
 	       ;
 }
 
-static int
-ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
+static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb,
+					     enum ip_conntrack_info *ctinfo)
+{
+	struct nf_conn *ct;
+
+	ct = nf_ct_get(skb, ctinfo);
+	if (ct && nf_ct_is_untracked(ct))
+		ct = NULL;
+
+	return ct;
+}
+
+static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nlattr *nest_parms;
@@ -2236,7 +2247,32 @@ nla_put_failure:
 }
 
 static int
-ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
+ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct,
+		     enum ip_conntrack_info ctinfo,
+		     u_int16_t ct_attr, u_int16_t ct_info_attr)
+{
+	struct nlattr *nest_parms;
+
+	nest_parms = nla_nest_start(skb, ct_attr | NLA_F_NESTED);
+	if (!nest_parms)
+		goto nla_put_failure;
+
+	if (__ctnetlink_glue_build(skb, ct) < 0)
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest_parms);
+
+	if (nla_put_be32(skb, ct_info_attr, htonl(ctinfo)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -ENOSPC;
+}
+
+static int
+ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
 {
 	int err;
 
@@ -2276,7 +2312,7 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
 }
 
 static int
-ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
+ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
 {
 	struct nlattr *cda[CTA_MAX+1];
 	int ret;
@@ -2286,16 +2322,16 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
 		return ret;
 
 	spin_lock_bh(&nf_conntrack_expect_lock);
-	ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+	ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
 	spin_unlock_bh(&nf_conntrack_expect_lock);
 
 	return ret;
 }
 
-static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
-				       const struct nf_conn *ct,
-				       struct nf_conntrack_tuple *tuple,
-				       struct nf_conntrack_tuple *mask)
+static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
+				    const struct nf_conn *ct,
+				    struct nf_conntrack_tuple *tuple,
+				    struct nf_conntrack_tuple *mask)
 {
 	int err;
 
@@ -2309,8 +2345,8 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
 }
 
 static int
-ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
-				u32 portid, u32 report)
+ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+			     u32 portid, u32 report)
 {
 	struct nlattr *cda[CTA_EXPECT_MAX+1];
 	struct nf_conntrack_tuple tuple, mask;
@@ -2322,8 +2358,8 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
 	if (err < 0)
 		return err;
 
-	err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
-					  ct, &tuple, &mask);
+	err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda,
+				       ct, &tuple, &mask);
 	if (err < 0)
 		return err;
 
@@ -2350,14 +2386,24 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
 	return 0;
 }
 
-static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
-	.build_size	= ctnetlink_nfqueue_build_size,
-	.build		= ctnetlink_nfqueue_build,
-	.parse		= ctnetlink_nfqueue_parse,
-	.attach_expect	= ctnetlink_nfqueue_attach_expect,
-	.seq_adjust	= nf_ct_tcp_seqadj_set,
+static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo, int diff)
+{
+	if (!(ct->status & IPS_NAT_MASK))
+		return;
+
+	nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
+}
+
+static struct nfnl_ct_hook ctnetlink_glue_hook = {
+	.get_ct		= ctnetlink_glue_get_ct,
+	.build_size	= ctnetlink_glue_build_size,
+	.build		= ctnetlink_glue_build,
+	.parse		= ctnetlink_glue_parse,
+	.attach_expect	= ctnetlink_glue_attach_expect,
+	.seq_adjust	= ctnetlink_glue_seqadj,
 };
-#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
+#endif /* CONFIG_NETFILTER_NETLINK_GLUE_CT */
 
 /***********************************************************************
  * EXPECT
@@ -3341,9 +3387,9 @@ static int __init ctnetlink_init(void)
 		pr_err("ctnetlink_init: cannot register pernet operations\n");
 		goto err_unreg_exp_subsys;
 	}
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
 	/* setup interaction between nf_queue and nf_conntrack_netlink. */
-	RCU_INIT_POINTER(nfq_ct_hook, &ctnetlink_nfqueue_hook);
+	RCU_INIT_POINTER(nfnl_ct_hook, &ctnetlink_glue_hook);
 #endif
 	return 0;
 
@@ -3362,8 +3408,8 @@ static void __exit ctnetlink_exit(void)
 	unregister_pernet_subsys(&ctnetlink_net_ops);
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
 	nfnetlink_subsys_unregister(&ctnl_subsys);
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
-	RCU_INIT_POINTER(nfq_ct_hook, NULL);
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
+	RCU_INIT_POINTER(nfnl_ct_hook, NULL);
 #endif
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 6dd995c7c72b..fce1b1cca32d 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -398,7 +398,7 @@ static inline struct dccp_net *dccp_pernet(struct net *net)
 }
 
 static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			      struct nf_conntrack_tuple *tuple)
+			      struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	struct dccp_hdr _hdr, *dh;
 
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 2281be419a74..86dc752e5349 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -45,7 +45,7 @@ static inline struct nf_generic_net *generic_pernet(struct net *net)
 
 static bool generic_pkt_to_tuple(const struct sk_buff *skb,
 				 unsigned int dataoff,
-				 struct nf_conntrack_tuple *tuple)
+				 struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	tuple->src.u.all = 0;
 	tuple->dst.u.all = 0;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 7648674f29c3..a96451a7af20 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -190,9 +190,8 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
 
 /* gre hdr info to tuple */
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			     struct nf_conntrack_tuple *tuple)
+			     struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev);
 	const struct gre_hdr_pptp *pgrehdr;
 	struct gre_hdr_pptp _pgrehdr;
 	__be16 srckey;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 67197731eb68..9578a7c371ef 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -156,7 +156,7 @@ static inline struct sctp_net *sctp_pernet(struct net *net)
 }
 
 static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			      struct nf_conntrack_tuple *tuple)
+			      struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	const struct sctphdr *hp;
 	struct sctphdr _hdr;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70383de72054..278f3b9356ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -277,7 +277,7 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net)
 }
 
 static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			     struct nf_conntrack_tuple *tuple)
+			     struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	const struct tcphdr *hp;
 	struct tcphdr _hdr;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 6957281ffee5..478f92f834b6 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -38,6 +38,7 @@ static inline struct nf_udp_net *udp_pernet(struct net *net)
 
 static bool udp_pkt_to_tuple(const struct sk_buff *skb,
 			     unsigned int dataoff,
+			     struct net *net,
 			     struct nf_conntrack_tuple *tuple)
 {
 	const struct udphdr *hp;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index c5903d1649f9..1ac8ee13a873 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -48,6 +48,7 @@ static inline struct udplite_net *udplite_pernet(struct net *net)
 
 static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
 				 unsigned int dataoff,
+				 struct net *net,
 				 struct nf_conntrack_tuple *tuple)
 {
 	const struct udphdr *hp;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5113dfd39df9..06a9f45771ab 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -83,7 +83,7 @@ out:
 	rcu_read_unlock();
 }
 
-int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
+int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
 {
 	struct flowi fl;
 	unsigned int hh_len;
@@ -99,7 +99,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
 		dst = ((struct xfrm_dst *)dst)->route;
 	dst_hold(dst);
 
-	dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
+	dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
 	if (IS_ERR(dst))
 		return PTR_ERR(dst);
 
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 97b75f9bfbcd..d43869879fcf 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -55,7 +55,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb,
 
 		rcu_read_lock();
 		indev = __in_dev_get_rcu(skb->dev);
-		if (indev != NULL) {
+		if (indev && indev->ifa_list) {
 			ifa = indev->ifa_list;
 			newdst = ifa->ifa_local;
 		}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 96777f9a9350..5baa8e24e6ac 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -69,19 +69,14 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 			dev_put(physdev);
 	}
 #endif
-	/* Drop reference to owner of hook which queued us. */
-	module_put(entry->elem->owner);
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
 
 /* Bump dev refs so they don't vanish while packet is out */
-bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 {
 	struct nf_hook_state *state = &entry->state;
 
-	if (!try_module_get(entry->elem->owner))
-		return false;
-
 	if (state->in)
 		dev_hold(state->in);
 	if (state->out)
@@ -100,8 +95,6 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 			dev_hold(physdev);
 	}
 #endif
-
-	return true;
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
 
@@ -131,22 +124,20 @@ int nf_queue(struct sk_buff *skb,
 	const struct nf_queue_handler *qh;
 
 	/* QUEUE == DROP if no one is waiting, to be safe. */
-	rcu_read_lock();
-
 	qh = rcu_dereference(queue_handler);
 	if (!qh) {
 		status = -ESRCH;
-		goto err_unlock;
+		goto err;
 	}
 
 	afinfo = nf_get_afinfo(state->pf);
 	if (!afinfo)
-		goto err_unlock;
+		goto err;
 
 	entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
 	if (!entry) {
 		status = -ENOMEM;
-		goto err_unlock;
+		goto err;
 	}
 
 	*entry = (struct nf_queue_entry) {
@@ -156,16 +147,11 @@ int nf_queue(struct sk_buff *skb,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
 
-	if (!nf_queue_entry_get_refs(entry)) {
-		status = -ECANCELED;
-		goto err_unlock;
-	}
+	nf_queue_entry_get_refs(entry);
 	skb_dst_force(skb);
 	afinfo->saveroute(skb, entry);
 	status = qh->outfn(entry, queuenum);
 
-	rcu_read_unlock();
-
 	if (status < 0) {
 		nf_queue_entry_release_refs(entry);
 		goto err;
@@ -173,8 +159,6 @@ int nf_queue(struct sk_buff *skb,
 
 	return 0;
 
-err_unlock:
-	rcu_read_unlock();
 err:
 	kfree(entry);
 	return status;
@@ -187,19 +171,15 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	const struct nf_afinfo *afinfo;
 	int err;
 
-	rcu_read_lock();
-
 	nf_queue_entry_release_refs(entry);
 
 	/* Continue traversal iff userspace said ok... */
-	if (verdict == NF_REPEAT) {
-		elem = list_entry(elem->list.prev, struct nf_hook_ops, list);
-		verdict = NF_ACCEPT;
-	}
+	if (verdict == NF_REPEAT)
+		verdict = elem->hook(elem->priv, skb, &entry->state);
 
 	if (verdict == NF_ACCEPT) {
 		afinfo = nf_get_afinfo(entry->state.pf);
-		if (!afinfo || afinfo->reroute(skb, entry) < 0)
+		if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
 			verdict = NF_DROP;
 	}
 
@@ -215,15 +195,13 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	case NF_ACCEPT:
 	case NF_STOP:
 		local_bh_disable();
-		entry->state.okfn(entry->state.sk, skb);
+		entry->state.okfn(entry->state.net, entry->state.sk, skb);
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
 		err = nf_queue(skb, elem, &entry->state,
 			       verdict >> NF_VERDICT_QBITS);
 		if (err < 0) {
-			if (err == -ECANCELED)
-				goto next_hook;
 			if (err == -ESRCH &&
 			   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
 				goto next_hook;
@@ -235,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	default:
 		kfree_skb(skb);
 	}
-	rcu_read_unlock();
+
 	kfree(entry);
 }
 EXPORT_SYMBOL(nf_reinject);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4a41eb92bcc0..93cc4737018f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1433,7 +1433,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 		for (i = 0; i < afi->nops; i++) {
 			ops = &basechain->ops[i];
 			ops->pf		= family;
-			ops->owner	= afi->owner;
 			ops->hooknum	= hooknum;
 			ops->priority	= priority;
 			ops->priv	= chain;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 05d0b03530f6..f3695a497408 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -48,9 +48,7 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt,
 			       const struct nft_chain *chain,
 			       int rulenum, enum nft_trace type)
 {
-	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
-
-	nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+	nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
 		     pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
 		     chain->table->name, chain->name, comments[type],
 		     rulenum);
@@ -111,10 +109,10 @@ struct nft_jumpstack {
 };
 
 unsigned int
-nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
+nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 {
-	const struct nft_chain *chain = ops->priv, *basechain = chain;
-	const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+	const struct nft_chain *chain = priv, *basechain = chain;
+	const struct net *net = pkt->net;
 	const struct nft_rule *rule;
 	const struct nft_expr *expr, *last;
 	struct nft_regs regs;
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 2cae4d4a03b7..7b9c053ba750 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -17,13 +17,13 @@
 
 static inline void
 nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
-			    const struct nf_hook_ops *ops, struct sk_buff *skb,
+			    struct sk_buff *skb,
 			    const struct nf_hook_state *state)
 {
 	struct iphdr *iph, _iph;
 	u32 len, thoff;
 
-	nft_set_pktinfo(pkt, ops, skb, state);
+	nft_set_pktinfo(pkt, skb, state);
 
 	iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
 				 &_iph);
@@ -48,7 +48,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
 
 static inline void
 __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-			      const struct nf_hook_ops *ops,
 			      struct sk_buff *skb,
 			      const struct nf_hook_state *state)
 {
@@ -82,33 +81,32 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 }
 
 static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-					       const struct nf_hook_ops *ops,
 					       struct sk_buff *skb,
 					       const struct nf_hook_state *state)
 {
-	nft_set_pktinfo(pkt, ops, skb, state);
-	__nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state);
+	nft_set_pktinfo(pkt, skb, state);
+	__nft_netdev_set_pktinfo_ipv6(pkt, skb, state);
 }
 
 static unsigned int
-nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nft_do_chain_netdev(void *priv, struct sk_buff *skb,
 		    const struct nf_hook_state *state)
 {
 	struct nft_pktinfo pkt;
 
 	switch (eth_hdr(skb)->h_proto) {
 	case htons(ETH_P_IP):
-		nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state);
+		nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
 		break;
 	case htons(ETH_P_IPV6):
-		nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state);
+		nft_netdev_set_pktinfo_ipv6(&pkt, skb, state);
 		break;
 	default:
-		nft_set_pktinfo(&pkt, ops, skb, state);
+		nft_set_pktinfo(&pkt, skb, state);
 		break;
 	}
 
-	return nft_do_chain(&pkt, ops);
+	return nft_do_chain(&pkt, priv);
 }
 
 static struct nft_af_info nft_af_netdev __read_mostly = {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 70277b11f742..46453ab318db 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -64,7 +64,7 @@ void nfnl_unlock(__u8 subsys_id)
 EXPORT_SYMBOL_GPL(nfnl_unlock);
 
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_nfnl_is_held(u8 subsys_id)
+bool lockdep_nfnl_is_held(u8 subsys_id)
 {
 	return lockdep_is_held(&table[subsys_id].mutex);
 }
@@ -492,7 +492,7 @@ static int nfnetlink_bind(struct net *net, int group)
 	type = nfnl_group2type[group];
 
 	rcu_read_lock();
-	ss = nfnetlink_get_subsys(type);
+	ss = nfnetlink_get_subsys(type << 8);
 	rcu_read_unlock();
 	if (!ss)
 		request_module("nfnetlink-subsys-%d", type);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 476accd17145..c7a2d0e1c462 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -291,6 +291,34 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
 	return ret;
 }
 
+static void untimeout(struct nf_conntrack_tuple_hash *i,
+		      struct ctnl_timeout *timeout)
+{
+	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
+	struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
+
+	if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
+		RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+}
+
+static void ctnl_untimeout(struct ctnl_timeout *timeout)
+{
+	struct nf_conntrack_tuple_hash *h;
+	const struct hlist_nulls_node *nn;
+	int i;
+
+	local_bh_disable();
+	for (i = 0; i < init_net.ct.htable_size; i++) {
+		spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+		if (i < init_net.ct.htable_size) {
+			hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
+				untimeout(h, timeout);
+		}
+		spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+	}
+	local_bh_enable();
+}
+
 /* try to delete object, fail if it is still in use. */
 static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
 {
@@ -301,6 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
 		/* We are protected by nfnl mutex. */
 		list_del_rcu(&timeout->head);
 		nf_ct_l4proto_put(timeout->l4proto);
+		ctnl_untimeout(timeout);
 		kfree_rcu(timeout, rcu_head);
 	} else {
 		/* still in use, restore reference counter. */
@@ -567,6 +596,10 @@ static void __exit cttimeout_exit(void)
 	pr_info("cttimeout: unregistering from nfnetlink.\n");
 
 	nfnetlink_subsys_unregister(&cttimeout_subsys);
+
+	/* Make sure no conntrack objects refer to custom timeouts anymore. */
+	ctnl_untimeout(NULL);
+
 	list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
 		list_del_rcu(&cur->head);
 		/* We are sure that our objects have no clients at this point,
@@ -579,6 +612,7 @@ static void __exit cttimeout_exit(void)
 	RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
 	RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+	rcu_barrier();
 }
 
 module_init(cttimeout_init);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 4670821b569d..06eb48fceb42 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -27,6 +27,7 @@
 #include <net/netlink.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_log.h>
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/spinlock.h>
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
@@ -401,7 +402,9 @@ __build_packet_message(struct nfnl_log_net *log,
 			unsigned int hooknum,
 			const struct net_device *indev,
 			const struct net_device *outdev,
-			const char *prefix, unsigned int plen)
+			const char *prefix, unsigned int plen,
+			const struct nfnl_ct_hook *nfnl_ct,
+			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
 {
 	struct nfulnl_msg_packet_hdr pmsg;
 	struct nlmsghdr *nlh;
@@ -538,9 +541,9 @@ __build_packet_message(struct nfnl_log_net *log,
 
 	if (skb->tstamp.tv64) {
 		struct nfulnl_msg_packet_timestamp ts;
-		struct timeval tv = ktime_to_timeval(skb->tstamp);
-		ts.sec = cpu_to_be64(tv.tv_sec);
-		ts.usec = cpu_to_be64(tv.tv_usec);
+		struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+		ts.sec = cpu_to_be64(kts.tv_sec);
+		ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
 
 		if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
 			goto nla_put_failure;
@@ -575,6 +578,10 @@ __build_packet_message(struct nfnl_log_net *log,
 			 htonl(atomic_inc_return(&log->global_seq))))
 		goto nla_put_failure;
 
+	if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
+				 NFULA_CT, NFULA_CT_INFO) < 0)
+		goto nla_put_failure;
+
 	if (data_len) {
 		struct nlattr *nla;
 		int size = nla_attr_size(data_len);
@@ -620,12 +627,16 @@ nfulnl_log_packet(struct net *net,
 		  const struct nf_loginfo *li_user,
 		  const char *prefix)
 {
-	unsigned int size, data_len;
+	size_t size;
+	unsigned int data_len;
 	struct nfulnl_instance *inst;
 	const struct nf_loginfo *li;
 	unsigned int qthreshold;
 	unsigned int plen;
 	struct nfnl_log_net *log = nfnl_log_pernet(net);
+	const struct nfnl_ct_hook *nfnl_ct = NULL;
+	struct nf_conn *ct = NULL;
+	enum ip_conntrack_info uninitialized_var(ctinfo);
 
 	if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
 		li = li_user;
@@ -671,6 +682,14 @@ nfulnl_log_packet(struct net *net,
 		size += nla_total_size(sizeof(u_int32_t));
 	if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
 		size += nla_total_size(sizeof(u_int32_t));
+	if (inst->flags & NFULNL_CFG_F_CONNTRACK) {
+		nfnl_ct = rcu_dereference(nfnl_ct_hook);
+		if (nfnl_ct != NULL) {
+			ct = nfnl_ct->get_ct(skb, &ctinfo);
+			if (ct != NULL)
+				size += nfnl_ct->build_size(ct);
+		}
+	}
 
 	qthreshold = inst->qthreshold;
 	/* per-rule qthreshold overrides per-instance */
@@ -715,7 +734,8 @@ nfulnl_log_packet(struct net *net,
 	inst->qlen++;
 
 	__build_packet_message(log, inst, skb, data_len, pf,
-				hooknum, in, out, prefix, plen);
+				hooknum, in, out, prefix, plen,
+				nfnl_ct, ct, ctinfo);
 
 	if (inst->qlen >= qthreshold)
 		__nfulnl_flush(inst);
@@ -805,6 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	struct net *net = sock_net(ctnl);
 	struct nfnl_log_net *log = nfnl_log_pernet(net);
 	int ret = 0;
+	u16 flags;
 
 	if (nfula[NFULA_CFG_CMD]) {
 		u_int8_t pf = nfmsg->nfgen_family;
@@ -826,6 +847,28 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 		goto out_put;
 	}
 
+	/* Check if we support these flags in first place, dependencies should
+	 * be there too not to break atomicity.
+	 */
+	if (nfula[NFULA_CFG_FLAGS]) {
+		flags = ntohs(nla_get_be16(nfula[NFULA_CFG_FLAGS]));
+
+		if ((flags & NFULNL_CFG_F_CONNTRACK) &&
+		    !rcu_access_pointer(nfnl_ct_hook)) {
+#ifdef CONFIG_MODULES
+			nfnl_unlock(NFNL_SUBSYS_ULOG);
+			request_module("ip_conntrack_netlink");
+			nfnl_lock(NFNL_SUBSYS_ULOG);
+			if (rcu_access_pointer(nfnl_ct_hook)) {
+				ret = -EAGAIN;
+				goto out_put;
+			}
+#endif
+			ret = -EOPNOTSUPP;
+			goto out_put;
+		}
+	}
+
 	if (cmd != NULL) {
 		switch (cmd->command) {
 		case NFULNL_CFG_CMD_BIND:
@@ -854,16 +897,15 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			ret = -ENOTSUPP;
 			break;
 		}
+	} else if (!inst) {
+		ret = -ENODEV;
+		goto out;
 	}
 
 	if (nfula[NFULA_CFG_MODE]) {
-		struct nfulnl_msg_config_mode *params;
-		params = nla_data(nfula[NFULA_CFG_MODE]);
+		struct nfulnl_msg_config_mode *params =
+			nla_data(nfula[NFULA_CFG_MODE]);
 
-		if (!inst) {
-			ret = -ENODEV;
-			goto out;
-		}
 		nfulnl_set_mode(inst, params->copy_mode,
 				ntohl(params->copy_range));
 	}
@@ -871,42 +913,23 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 	if (nfula[NFULA_CFG_TIMEOUT]) {
 		__be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
 
-		if (!inst) {
-			ret = -ENODEV;
-			goto out;
-		}
 		nfulnl_set_timeout(inst, ntohl(timeout));
 	}
 
 	if (nfula[NFULA_CFG_NLBUFSIZ]) {
 		__be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
 
-		if (!inst) {
-			ret = -ENODEV;
-			goto out;
-		}
 		nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
 	}
 
 	if (nfula[NFULA_CFG_QTHRESH]) {
 		__be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
 
-		if (!inst) {
-			ret = -ENODEV;
-			goto out;
-		}
 		nfulnl_set_qthresh(inst, ntohl(qthresh));
 	}
 
-	if (nfula[NFULA_CFG_FLAGS]) {
-		__be16 flags = nla_get_be16(nfula[NFULA_CFG_FLAGS]);
-
-		if (!inst) {
-			ret = -ENODEV;
-			goto out;
-		}
-		nfulnl_set_flags(inst, ntohs(flags));
-	}
+	if (nfula[NFULA_CFG_FLAGS])
+		nfulnl_set_flags(inst, flags);
 
 out_put:
 	instance_put(inst);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue.c
index a5cd6d90b78b..7d81d280cb4f 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -28,12 +28,12 @@
 #include <linux/netfilter_bridge.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/list.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <net/netfilter/nf_queue.h>
 #include <net/netns/generic.h>
-#include <net/netfilter/nfnetlink_queue.h>
 
 #include <linux/atomic.h>
 
@@ -313,6 +313,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	struct net_device *outdev;
 	struct nf_conn *ct = NULL;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
+	struct nfnl_ct_hook *nfnl_ct;
 	bool csum_verify;
 	char *secdata = NULL;
 	u32 seclen = 0;
@@ -364,8 +365,14 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 		break;
 	}
 
-	if (queue->flags & NFQA_CFG_F_CONNTRACK)
-		ct = nfqnl_ct_get(entskb, &size, &ctinfo);
+	if (queue->flags & NFQA_CFG_F_CONNTRACK) {
+		nfnl_ct = rcu_dereference(nfnl_ct_hook);
+		if (nfnl_ct != NULL) {
+			ct = nfnl_ct->get_ct(entskb, &ctinfo);
+			if (ct != NULL)
+				size += nfnl_ct->build_size(ct);
+		}
+	}
 
 	if (queue->flags & NFQA_CFG_F_UID_GID) {
 		size +=  (nla_total_size(sizeof(u_int32_t))	/* uid */
@@ -493,9 +500,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 
 	if (entskb->tstamp.tv64) {
 		struct nfqnl_msg_packet_timestamp ts;
-		struct timeval tv = ktime_to_timeval(entskb->tstamp);
-		ts.sec = cpu_to_be64(tv.tv_sec);
-		ts.usec = cpu_to_be64(tv.tv_usec);
+		struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+
+		ts.sec = cpu_to_be64(kts.tv_sec);
+		ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
 
 		if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
 			goto nla_put_failure;
@@ -508,7 +516,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
 		goto nla_put_failure;
 
-	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
+	if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
 		goto nla_put_failure;
 
 	if (cap_len > data_len &&
@@ -598,12 +606,9 @@ static struct nf_queue_entry *
 nf_queue_entry_dup(struct nf_queue_entry *e)
 {
 	struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
-	if (entry) {
-		if (nf_queue_entry_get_refs(entry))
-			return entry;
-		kfree(entry);
-	}
-	return NULL;
+	if (entry)
+		nf_queue_entry_get_refs(entry);
+	return entry;
 }
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
@@ -670,8 +675,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	struct nfqnl_instance *queue;
 	struct sk_buff *skb, *segs;
 	int err = -ENOBUFS;
-	struct net *net = dev_net(entry->state.in ?
-				  entry->state.in : entry->state.out);
+	struct net *net = entry->state.net;
 	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
 	/* rcu_read_lock()ed by nf_hook_slow() */
@@ -699,7 +703,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	nf_bridge_adjust_skb_data(skb);
 	segs = skb_gso_segment(skb, 0);
 	/* Does not use PTR_ERR to limit the number of error codes that can be
-	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to
+	 * returned by nf_queue.  For instance, callers rely on -ESRCH to
 	 * mean 'ignore this hook'.
 	 */
 	if (IS_ERR_OR_NULL(segs))
@@ -1002,6 +1006,28 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
 	return 0;
 }
 
+static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
+				      const struct nlmsghdr *nlh,
+				      const struct nlattr * const nfqa[],
+				      struct nf_queue_entry *entry,
+				      enum ip_conntrack_info *ctinfo)
+{
+	struct nf_conn *ct;
+
+	ct = nfnl_ct->get_ct(entry->skb, ctinfo);
+	if (ct == NULL)
+		return NULL;
+
+	if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
+		return NULL;
+
+	if (nfqa[NFQA_EXP])
+		nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
+				      NETLINK_CB(entry->skb).portid,
+				      nlmsg_report(nlh));
+	return ct;
+}
+
 static int
 nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 		   const struct nlmsghdr *nlh,
@@ -1015,6 +1041,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 	unsigned int verdict;
 	struct nf_queue_entry *entry;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
+	struct nfnl_ct_hook *nfnl_ct;
 	struct nf_conn *ct = NULL;
 
 	struct net *net = sock_net(ctnl);
@@ -1038,12 +1065,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 		return -ENOENT;
 
 	if (nfqa[NFQA_CT]) {
-		ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
-		if (ct && nfqa[NFQA_EXP]) {
-			nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
-					    NETLINK_CB(skb).portid,
-					    nlmsg_report(nlh));
-		}
+		/* rcu lock already held from nfnl->call_rcu. */
+		nfnl_ct = rcu_dereference(nfnl_ct_hook);
+		if (nfnl_ct != NULL)
+			ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
 	}
 
 	if (nfqa[NFQA_PAYLOAD]) {
@@ -1054,8 +1079,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
 				 payload_len, entry, diff) < 0)
 			verdict = NF_DROP;
 
-		if (ct)
-			nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
+		if (ct && diff)
+			nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
 	}
 
 	if (nfqa[NFQA_MARK])
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
deleted file mode 100644
index 96cac50e0d12..000000000000
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/skbuff.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_queue.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nfnetlink_queue.h>
-
-struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size,
-			     enum ip_conntrack_info *ctinfo)
-{
-	struct nfq_ct_hook *nfq_ct;
-	struct nf_conn *ct;
-
-	/* rcu_read_lock()ed by __nf_queue already. */
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return NULL;
-
-	ct = nf_ct_get(entskb, ctinfo);
-	if (ct) {
-		if (!nf_ct_is_untracked(ct))
-			*size += nfq_ct->build_size(ct);
-		else
-			ct = NULL;
-	}
-	return ct;
-}
-
-struct nf_conn *
-nfqnl_ct_parse(const struct sk_buff *skb, const struct nlattr *attr,
-	       enum ip_conntrack_info *ctinfo)
-{
-	struct nfq_ct_hook *nfq_ct;
-	struct nf_conn *ct;
-
-	/* rcu_read_lock()ed by __nf_queue already. */
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return NULL;
-
-	ct = nf_ct_get(skb, ctinfo);
-	if (ct && !nf_ct_is_untracked(ct))
-		nfq_ct->parse(attr, ct);
-
-	return ct;
-}
-
-int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct,
-		 enum ip_conntrack_info ctinfo)
-{
-	struct nfq_ct_hook *nfq_ct;
-	struct nlattr *nest_parms;
-	u_int32_t tmp;
-
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return 0;
-
-	nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED);
-	if (!nest_parms)
-		goto nla_put_failure;
-
-	if (nfq_ct->build(skb, ct) < 0)
-		goto nla_put_failure;
-
-	nla_nest_end(skb, nest_parms);
-
-	tmp = ctinfo;
-	if (nla_put_be32(skb, NFQA_CT_INFO, htonl(tmp)))
-		goto nla_put_failure;
-
-	return 0;
-
-nla_put_failure:
-	return -1;
-}
-
-void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
-			 enum ip_conntrack_info ctinfo, int diff)
-{
-	struct nfq_ct_hook *nfq_ct;
-
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return;
-
-	if ((ct->status & IPS_NAT_MASK) && diff)
-		nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
-}
-
-int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
-			u32 portid, u32 report)
-{
-	struct nfq_ct_hook *nfq_ct;
-
-	if (nf_ct_is_untracked(ct))
-		return 0;
-
-	nfq_ct = rcu_dereference(nfq_ct_hook);
-	if (nfq_ct == NULL)
-		return -EOPNOTSUPP;
-
-	return nfq_ct->attach_expect(attr, ct, portid, report);
-}
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index a13d6a386d63..319c22b4bca2 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -31,9 +31,8 @@ static void nft_log_eval(const struct nft_expr *expr,
 			 const struct nft_pktinfo *pkt)
 {
 	const struct nft_log *priv = nft_expr_priv(expr);
-	struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
 
-	nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in,
+	nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
 		      pkt->out, &priv->loginfo, "%s", priv->prefix);
 }
 
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index cb2f13ebb5a6..9dfaf4d55ee0 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -31,6 +31,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 	const struct nft_meta *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
 	const struct net_device *in = pkt->in, *out = pkt->out;
+	struct sock *sk;
 	u32 *dest = &regs->data[priv->dreg];
 
 	switch (priv->key) {
@@ -42,7 +43,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 		*(__be16 *)dest = skb->protocol;
 		break;
 	case NFT_META_NFPROTO:
-		*dest = pkt->ops->pf;
+		*dest = pkt->pf;
 		break;
 	case NFT_META_L4PROTO:
 		*dest = pkt->tprot;
@@ -86,33 +87,35 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 		*(u16 *)dest = out->type;
 		break;
 	case NFT_META_SKUID:
-		if (skb->sk == NULL || !sk_fullsock(skb->sk))
+		sk = skb_to_full_sk(skb);
+		if (!sk || !sk_fullsock(sk))
 			goto err;
 
-		read_lock_bh(&skb->sk->sk_callback_lock);
-		if (skb->sk->sk_socket == NULL ||
-		    skb->sk->sk_socket->file == NULL) {
-			read_unlock_bh(&skb->sk->sk_callback_lock);
+		read_lock_bh(&sk->sk_callback_lock);
+		if (sk->sk_socket == NULL ||
+		    sk->sk_socket->file == NULL) {
+			read_unlock_bh(&sk->sk_callback_lock);
 			goto err;
 		}
 
 		*dest =	from_kuid_munged(&init_user_ns,
-				skb->sk->sk_socket->file->f_cred->fsuid);
-		read_unlock_bh(&skb->sk->sk_callback_lock);
+				sk->sk_socket->file->f_cred->fsuid);
+		read_unlock_bh(&sk->sk_callback_lock);
 		break;
 	case NFT_META_SKGID:
-		if (skb->sk == NULL || !sk_fullsock(skb->sk))
+		sk = skb_to_full_sk(skb);
+		if (!sk || !sk_fullsock(sk))
 			goto err;
 
-		read_lock_bh(&skb->sk->sk_callback_lock);
-		if (skb->sk->sk_socket == NULL ||
-		    skb->sk->sk_socket->file == NULL) {
-			read_unlock_bh(&skb->sk->sk_callback_lock);
+		read_lock_bh(&sk->sk_callback_lock);
+		if (sk->sk_socket == NULL ||
+		    sk->sk_socket->file == NULL) {
+			read_unlock_bh(&sk->sk_callback_lock);
 			goto err;
 		}
 		*dest =	from_kgid_munged(&init_user_ns,
-				 skb->sk->sk_socket->file->f_cred->fsgid);
-		read_unlock_bh(&skb->sk->sk_callback_lock);
+				 sk->sk_socket->file->f_cred->fsgid);
+		read_unlock_bh(&sk->sk_callback_lock);
 		break;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	case NFT_META_RTCLASSID: {
@@ -135,7 +138,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 			break;
 		}
 
-		switch (pkt->ops->pf) {
+		switch (pkt->pf) {
 		case NFPROTO_IPV4:
 			if (ipv4_is_multicast(ip_hdr(skb)->daddr))
 				*dest = PACKET_MULTICAST;
@@ -168,9 +171,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 		break;
 #ifdef CONFIG_CGROUP_NET_CLASSID
 	case NFT_META_CGROUP:
-		if (skb->sk == NULL || !sk_fullsock(skb->sk))
+		sk = skb_to_full_sk(skb);
+		if (!sk || !sk_fullsock(sk))
 			goto err;
-		*dest = skb->sk->sk_classid;
+		*dest = sk->sk_classid;
 		break;
 #endif
 	default:
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 96805d21d618..61d216eb7917 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -42,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
 			queue = priv->queuenum + cpu % priv->queues_total;
 		} else {
 			queue = nfqueue_hash(pkt->skb, queue,
-					     priv->queues_total, pkt->ops->pf,
+					     priv->queues_total, pkt->pf,
 					     jhash_initval);
 		}
 	}
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 635dbba93d01..759ca5248a3d 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -22,38 +22,37 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
 				 const struct nft_pktinfo *pkt)
 {
 	struct nft_reject *priv = nft_expr_priv(expr);
-	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
 
-	switch (pkt->ops->pf) {
+	switch (pkt->pf) {
 	case NFPROTO_IPV4:
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
 			nf_send_unreach(pkt->skb, priv->icmp_code,
-					pkt->ops->hooknum);
+					pkt->hook);
 			break;
 		case NFT_REJECT_TCP_RST:
-			nf_send_reset(pkt->skb, pkt->ops->hooknum);
+			nf_send_reset(pkt->net, pkt->skb, pkt->hook);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
 			nf_send_unreach(pkt->skb,
 					nft_reject_icmp_code(priv->icmp_code),
-					pkt->ops->hooknum);
+					pkt->hook);
 			break;
 		}
 		break;
 	case NFPROTO_IPV6:
 		switch (priv->type) {
 		case NFT_REJECT_ICMP_UNREACH:
-			nf_send_unreach6(net, pkt->skb, priv->icmp_code,
-					 pkt->ops->hooknum);
+			nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
+					 pkt->hook);
 			break;
 		case NFT_REJECT_TCP_RST:
-			nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+			nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
 			break;
 		case NFT_REJECT_ICMPX_UNREACH:
-			nf_send_unreach6(net, pkt->skb,
+			nf_send_unreach6(pkt->net, pkt->skb,
 					 nft_reject_icmpv6_code(priv->icmp_code),
-					 pkt->ops->hooknum);
+					 pkt->hook);
 			break;
 		}
 		break;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 9b42b5ea6dcd..d4aaad747ea9 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1193,7 +1193,6 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
 		if (!(hook_mask & 1))
 			continue;
 		ops[i].hook     = fn;
-		ops[i].owner    = table->me;
 		ops[i].pf       = table->af;
 		ops[i].hooknum  = hooknum;
 		ops[i].priority = table->priority;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index faf32d888198..e7ac07e53b59 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -171,6 +171,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 	if (timeout_ext == NULL)
 		ret = -ENOMEM;
 
+	rcu_read_unlock();
+	return ret;
+
 err_put_timeout:
 	__xt_ct_tg_timeout_put(timeout);
 out:
@@ -318,8 +321,10 @@ static void xt_ct_destroy_timeout(struct nf_conn *ct)
 
 	if (timeout_put) {
 		timeout_ext = nf_ct_timeout_find(ct);
-		if (timeout_ext)
+		if (timeout_ext) {
 			timeout_put(timeout_ext->timeout);
+			RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+		}
 	}
 	rcu_read_unlock();
 #endif
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index c13b79440ede..1763ab82bcd7 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -33,7 +33,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
-	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct net *net = par->net;
 
 	li.type = NF_LOG_TYPE_LOG;
 	li.u.log.level = loginfo->level;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index fb7497c928a0..a1fa2c800cb9 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -26,7 +26,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
-	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct net *net = par->net;
 
 	li.type		     = NF_LOG_TYPE_ULOG;
 	li.u.ulog.copy_len   = info->len;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8c02501a530f..b7c43def0dc6 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 		return -1;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
-		struct net *net = dev_net(par->in ? par->in : par->out);
+		struct net *net = par->net;
 		unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
 
 		if (dst_mtu(skb_dst(skb)) <= minlen) {
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index fd980aa7715d..3eff7b67cdf2 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -31,8 +31,9 @@ static unsigned int
 tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
+	int oif = info->priv ? info->priv->oif : 0;
 
-	nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif);
+	nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, oif);
 
 	return XT_CONTINUE;
 }
@@ -42,8 +43,9 @@ static unsigned int
 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
+	int oif = info->priv ? info->priv->oif : 0;
 
-	nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif);
+	nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, oif);
 
 	return XT_CONTINUE;
 }
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d0c96c5ae29a..3ab591e73ec0 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -250,8 +250,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
  * no such listener is found, or NULL if the TCP header is incomplete.
  */
 static struct sock *
-tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
-			struct sock *sk)
+tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+			 __be32 laddr, __be16 lport, struct sock *sk)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr _hdr, *hp;
@@ -267,7 +267,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
 		 * to a listener socket if there's one */
 		struct sock *sk2;
 
-		sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+		sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
 					    iph->saddr, laddr ? laddr : iph->daddr,
 					    hp->source, lport ? lport : hp->dest,
 					    skb->dev, NFT_LOOKUP_LISTENER);
@@ -290,7 +290,7 @@ nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
 }
 
 static unsigned int
-tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
+tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
 	   u_int32_t mark_mask, u_int32_t mark_value)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -305,7 +305,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
 	 * addresses, this happens if the redirect already happened
 	 * and the current packet belongs to an already established
 	 * connection */
-	sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+	sk = nf_tproxy_get_sock_v4(net, iph->protocol,
 				   iph->saddr, iph->daddr,
 				   hp->source, hp->dest,
 				   skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -317,11 +317,11 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
 	/* UDP has no TCP_TIME_WAIT state, so we never enter here */
 	if (sk && sk->sk_state == TCP_TIME_WAIT)
 		/* reopening a TIME_WAIT connection needs special handling */
-		sk = tproxy_handle_time_wait4(skb, laddr, lport, sk);
+		sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
 	else if (!sk)
 		/* no, there's no established connection, check if
 		 * there's a listener on the redirected addr/port */
-		sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+		sk = nf_tproxy_get_sock_v4(net, iph->protocol,
 					   iph->saddr, laddr,
 					   hp->source, lport,
 					   skb->dev, NFT_LOOKUP_LISTENER);
@@ -351,7 +351,7 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tproxy_target_info *tgi = par->targinfo;
 
-	return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
+	return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
 }
 
 static unsigned int
@@ -359,7 +359,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
 
-	return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
+	return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
 }
 
 #ifdef XT_TPROXY_HAVE_IPV6
@@ -429,7 +429,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
 		 * to a listener socket if there's one */
 		struct sock *sk2;
 
-		sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+		sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
 					    &iph->saddr,
 					    tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
 					    hp->source,
@@ -472,7 +472,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	 * addresses, this happens if the redirect already happened
 	 * and the current packet belongs to an already established
 	 * connection */
-	sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+	sk = nf_tproxy_get_sock_v6(par->net, tproto,
 				   &iph->saddr, &iph->daddr,
 				   hp->source, hp->dest,
 				   par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,7 +487,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
 	else if (!sk)
 		/* no there's no established connection, check if
 		 * there's a listener on the redirected addr/port */
-		sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+		sk = nf_tproxy_get_sock_v6(par->net, tproto,
 					   &iph->saddr, laddr,
 					   hp->source, lport,
 					   par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 5b4743cc0436..11d6091991a4 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 static bool
 addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct net *net = par->net;
 	const struct xt_addrtype_info *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
 	bool ret = true;
@@ -143,7 +143,7 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 static bool
 addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct net *net = par->net;
 	const struct xt_addrtype_info_v1 *info = par->matchinfo;
 	const struct iphdr *iph;
 	const struct net_device *dev = NULL;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 075d89d94d28..99bbc829868d 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -317,7 +317,7 @@ static int count_them(struct net *net,
 static bool
 connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct net *net = par->net;
 	const struct xt_connlimit_info *info = par->matchinfo;
 	union nf_inet_addr addr;
 	struct nf_conntrack_tuple tuple;
@@ -332,7 +332,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 		zone = nf_ct_zone(ct);
 	} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-				    par->family, &tuple)) {
+				      par->family, net, &tuple)) {
 		goto hotdrop;
 	}
 
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 8d47c3780fda..71a9d95e0a81 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -48,6 +48,7 @@ static bool
 ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_ipvs_mtinfo *data = par->matchinfo;
+	struct netns_ipvs *ipvs = net_ipvs(par->net);
 	/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
 	const u_int8_t family = par->family;
 	struct ip_vs_iphdr iph;
@@ -67,7 +68,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		goto out;
 	}
 
-	ip_vs_fill_iph_skb(family, skb, &iph);
+	ip_vs_fill_iph_skb(family, skb, true, &iph);
 
 	if (data->bitmask & XT_IPVS_PROTO)
 		if ((iph.protocol == data->l4proto) ^
@@ -85,7 +86,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	/*
 	 * Check if the packet belongs to an existing entry
 	 */
-	cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */);
+	cp = pp->conn_out_get(ipvs, family, skb, &iph);
 	if (unlikely(cp == NULL)) {
 		match = false;
 		goto out;
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 0778855ea5e7..df8801e02a32 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -200,7 +200,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 	unsigned char opts[MAX_IPOPTLEN];
 	const struct xt_osf_finger *kf;
 	const struct xt_osf_user_finger *f;
-	struct net *net = dev_net(p->in ? p->in : p->out);
+	struct net *net = p->net;
 
 	if (!info)
 		return false;
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index ca2e577ed8ac..1302b475abcb 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -14,6 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/file.h>
 #include <net/sock.h>
+#include <net/inet_sock.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_owner.h>
 
@@ -33,8 +34,9 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
+	struct sock *sk = skb_to_full_sk(skb);
 
-	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
+	if (sk == NULL || sk->sk_socket == NULL)
 		return (info->match ^ info->invert) == 0;
 	else if (info->match & info->invert & XT_OWNER_SOCKET)
 		/*
@@ -43,7 +45,7 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 */
 		return false;
 
-	filp = skb->sk->sk_socket->file;
+	filp = sk->sk_socket->file;
 	if (filp == NULL)
 		return ((info->match ^ info->invert) &
 		       (XT_OWNER_UID | XT_OWNER_GID)) == 0;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 45e1b30e4fb2..d725a27743a1 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -237,7 +237,7 @@ static void recent_table_flush(struct recent_table *t)
 static bool
 recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = dev_net(par->in ? par->in : par->out);
+	struct net *net = par->net;
 	struct recent_net *recent_net = recent_pernet(net);
 	const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
 	struct recent_table *t;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 43e26c881100..2ec08f04b816 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -143,7 +143,8 @@ static bool xt_socket_sk_is_transparent(struct sock *sk)
 	}
 }
 
-static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
+static struct sock *xt_socket_lookup_slow_v4(struct net *net,
+					     const struct sk_buff *skb,
 					     const struct net_device *indev)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -197,7 +198,7 @@ static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
 	}
 #endif
 
-	return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr,
+	return xt_socket_get_sock_v4(net, protocol, saddr, daddr,
 				     sport, dport, indev);
 }
 
@@ -209,7 +210,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	struct sock *sk = skb->sk;
 
 	if (!sk)
-		sk = xt_socket_lookup_slow_v4(skb, par->in);
+		sk = xt_socket_lookup_slow_v4(par->net, skb, par->in);
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
@@ -335,7 +336,8 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
 	return NULL;
 }
 
-static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
+static struct sock *xt_socket_lookup_slow_v6(struct net *net,
+					     const struct sk_buff *skb,
 					     const struct net_device *indev)
 {
 	__be16 uninitialized_var(dport), uninitialized_var(sport);
@@ -371,7 +373,7 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
 		return NULL;
 	}
 
-	return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr,
+	return xt_socket_get_sock_v6(net, tproto, saddr, daddr,
 				     sport, dport, indev);
 }
 
@@ -383,7 +385,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 	struct sock *sk = skb->sk;
 
 	if (!sk)
-		sk = xt_socket_lookup_slow_v6(skb, par->in);
+		sk = xt_socket_lookup_slow_v6(par->net, skb, par->in);
 	if (sk) {
 		bool wildcard;
 		bool transparent = true;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fafe33bdb619..59651af8cc27 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2116,7 +2116,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 	consume_skb(info.skb2);
 
 	if (info.delivered) {
-		if (info.congested && (allocation & __GFP_WAIT))
+		if (info.congested && gfpflags_allow_blocking(allocation))
 			yield();
 		return 0;
 	}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2ed5f964772e..bc0e504f33a6 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -39,7 +39,7 @@ void genl_unlock(void)
 EXPORT_SYMBOL(genl_unlock);
 
 #ifdef CONFIG_LOCKDEP
-int lockdep_genl_is_held(void)
+bool lockdep_genl_is_held(void)
 {
 	return lockdep_is_held(&genl_mutex);
 }
@@ -1136,19 +1136,19 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(genlmsg_multicast_allns);
 
-void genl_notify(struct genl_family *family,
-		 struct sk_buff *skb, struct net *net, u32 portid, u32 group,
-		 struct nlmsghdr *nlh, gfp_t flags)
+void genl_notify(struct genl_family *family, struct sk_buff *skb,
+		 struct genl_info *info, u32 group, gfp_t flags)
 {
+	struct net *net = genl_info_net(info);
 	struct sock *sk = net->genl_sock;
 	int report = 0;
 
-	if (nlh)
-		report = nlmsg_report(nlh);
+	if (info->nlhdr)
+		report = nlmsg_report(info->nlhdr);
 
 	if (WARN_ON_ONCE(group >= family->n_mcgrps))
 		return;
 	group = family->mcgrp_offset + group;
-	nlmsg_notify(sk, skb, portid, group, report, flags);
+	nlmsg_notify(sk, skb, info->snd_portid, group, report, flags);
 }
 EXPORT_SYMBOL(genl_notify);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index cff3f1614ad4..1fe3d3b362c0 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -449,7 +449,7 @@ error:
  * @dev: The nfc device that found the target
  * @target_idx: index of the target that must be deactivated
  */
-int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx)
+int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
 {
 	int rc = 0;
 
@@ -476,7 +476,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx)
 	if (dev->ops->check_presence)
 		del_timer_sync(&dev->check_pres_timer);
 
-	dev->ops->deactivate_target(dev, dev->active_target);
+	dev->ops->deactivate_target(dev, dev->active_target, mode);
 	dev->active_target = NULL;
 
 error:
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index 009bcf317101..23c2a118ac9f 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -631,7 +631,8 @@ static int digital_activate_target(struct nfc_dev *nfc_dev,
 }
 
 static void digital_deactivate_target(struct nfc_dev *nfc_dev,
-				      struct nfc_target *target)
+				      struct nfc_target *target,
+				      u8 mode)
 {
 	struct nfc_digital_dev *ddev = nfc_get_drvdata(nfc_dev);
 
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 6e061da2258a..2b0f0ac498d2 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -678,7 +678,8 @@ static int hci_activate_target(struct nfc_dev *nfc_dev,
 }
 
 static void hci_deactivate_target(struct nfc_dev *nfc_dev,
-				  struct nfc_target *target)
+				  struct nfc_target *target,
+				  u8 mode)
 {
 }
 
diff --git a/net/nfc/hci/llc.c b/net/nfc/hci/llc.c
index 1b90c0531852..1399a03fa6e6 100644
--- a/net/nfc/hci/llc.c
+++ b/net/nfc/hci/llc.c
@@ -144,11 +144,13 @@ inline int nfc_llc_start(struct nfc_llc *llc)
 {
 	return llc->ops->start(llc);
 }
+EXPORT_SYMBOL(nfc_llc_start);
 
 inline int nfc_llc_stop(struct nfc_llc *llc)
 {
 	return llc->ops->stop(llc);
 }
+EXPORT_SYMBOL(nfc_llc_stop);
 
 inline void nfc_llc_rcv_from_drv(struct nfc_llc *llc, struct sk_buff *skb)
 {
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index 901c1ddba841..85d4819ab657 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -12,7 +12,7 @@ config NFC_NCI
 config NFC_NCI_SPI
 	depends on NFC_NCI && SPI
 	select CRC_CCITT
-	bool "NCI over SPI protocol support"
+	tristate "NCI over SPI protocol support"
 	default n
 	help
 	  NCI (NFC Controller Interface) is a communication protocol between
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index b4b85b82e988..0ca31d9bf741 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_NFC_NCI) += nci.o
 
 nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
 
-nci-$(CONFIG_NFC_NCI_SPI) += spi.o
+nci_spi-y += spi.o
+obj-$(CONFIG_NFC_NCI_SPI) += nci_spi.o
 
 nci_uart-y += uart.o
 obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 943889b87a34..10c99a578421 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -64,6 +64,19 @@ struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
 	return NULL;
 }
 
+int nci_get_conn_info_by_id(struct nci_dev *ndev, u8 id)
+{
+	struct nci_conn_info *conn_info;
+
+	list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
+		if (conn_info->id == id)
+			return conn_info->conn_id;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL(nci_get_conn_info_by_id);
+
 /* ---- NCI requests ---- */
 
 void nci_req_complete(struct nci_dev *ndev, int result)
@@ -325,32 +338,46 @@ static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt)
 		     sizeof(struct nci_rf_deactivate_cmd), &cmd);
 }
 
-struct nci_prop_cmd_param {
+struct nci_cmd_param {
 	__u16 opcode;
 	size_t len;
 	__u8 *payload;
 };
 
-static void nci_prop_cmd_req(struct nci_dev *ndev, unsigned long opt)
+static void nci_generic_req(struct nci_dev *ndev, unsigned long opt)
 {
-	struct nci_prop_cmd_param *param = (struct nci_prop_cmd_param *)opt;
+	struct nci_cmd_param *param =
+		(struct nci_cmd_param *)opt;
 
 	nci_send_cmd(ndev, param->opcode, param->len, param->payload);
 }
 
 int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, __u8 *payload)
 {
-	struct nci_prop_cmd_param param;
+	struct nci_cmd_param param;
 
 	param.opcode = nci_opcode_pack(NCI_GID_PROPRIETARY, oid);
 	param.len = len;
 	param.payload = payload;
 
-	return __nci_request(ndev, nci_prop_cmd_req, (unsigned long)&param,
+	return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
 			     msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_prop_cmd);
 
+int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, __u8 *payload)
+{
+	struct nci_cmd_param param;
+
+	param.opcode = opcode;
+	param.len = len;
+	param.payload = payload;
+
+	return __nci_request(ndev, nci_generic_req, (unsigned long)&param,
+			     msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_core_cmd);
+
 int nci_core_reset(struct nci_dev *ndev)
 {
 	return __nci_request(ndev, nci_reset_req, 0,
@@ -402,9 +429,8 @@ static int nci_open_device(struct nci_dev *ndev)
 				   msecs_to_jiffies(NCI_INIT_TIMEOUT));
 	}
 
-	if (ndev->ops->post_setup) {
+	if (!rc && ndev->ops->post_setup)
 		rc = ndev->ops->post_setup(ndev);
-	}
 
 	if (!rc) {
 		rc = __nci_request(ndev, nci_init_complete_req, 0,
@@ -540,7 +566,7 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
 {
-	return nci_request(ndev, nci_nfcee_discover_req, action,
+	return __nci_request(ndev, nci_nfcee_discover_req, action,
 				msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_nfcee_discover);
@@ -561,8 +587,9 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
 	cmd.nfcee_id = nfcee_id;
 	cmd.nfcee_mode = nfcee_mode;
 
-	return nci_request(ndev, nci_nfcee_mode_set_req, (unsigned long)&cmd,
-			   msecs_to_jiffies(NCI_CMD_TIMEOUT));
+	return __nci_request(ndev, nci_nfcee_mode_set_req,
+			     (unsigned long)&cmd,
+			     msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_nfcee_mode_set);
 
@@ -588,12 +615,19 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
 	if (!cmd)
 		return -ENOMEM;
 
+	if (!number_destination_params)
+		return -EINVAL;
+
 	cmd->destination_type = destination_type;
 	cmd->number_destination_params = number_destination_params;
 	memcpy(cmd->params, params, params_len);
 
 	data.cmd = cmd;
-	ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX];
+
+	if (params->length > 0)
+		ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX];
+	else
+		ndev->cur_id = 0;
 
 	r = __nci_request(ndev, nci_core_conn_create_req,
 			  (unsigned long)&data,
@@ -612,8 +646,8 @@ static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
 
 int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
 {
-	return nci_request(ndev, nci_core_conn_close_req, conn_id,
-				msecs_to_jiffies(NCI_CMD_TIMEOUT));
+	return __nci_request(ndev, nci_core_conn_close_req, conn_id,
+			     msecs_to_jiffies(NCI_CMD_TIMEOUT));
 }
 EXPORT_SYMBOL(nci_core_conn_close);
 
@@ -801,9 +835,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
 }
 
 static void nci_deactivate_target(struct nfc_dev *nfc_dev,
-				  struct nfc_target *target)
+				  struct nfc_target *target,
+				  __u8 mode)
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
+	u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE;
 
 	pr_debug("entry\n");
 
@@ -814,9 +850,14 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev,
 
 	ndev->target_active_prot = 0;
 
+	switch (mode) {
+	case NFC_TARGET_MODE_SLEEP:
+		nci_mode = NCI_DEACTIVATE_TYPE_SLEEP_MODE;
+		break;
+	}
+
 	if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) {
-		nci_request(ndev, nci_rf_deactivate_req,
-			    NCI_DEACTIVATE_TYPE_IDLE_MODE,
+		nci_request(ndev, nci_rf_deactivate_req, nci_mode,
 			    msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT));
 	}
 }
@@ -850,7 +891,7 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev)
 	pr_debug("entry\n");
 
 	if (nfc_dev->rf_mode == NFC_RF_INITIATOR) {
-		nci_deactivate_target(nfc_dev, NULL);
+		nci_deactivate_target(nfc_dev, NULL, NCI_DEACTIVATE_TYPE_IDLE_MODE);
 	} else {
 		if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE ||
 		    atomic_read(&ndev->state) == NCI_DISCOVERY) {
@@ -1177,7 +1218,7 @@ int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(nci_recv_frame);
 
-static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
+int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	pr_debug("len %d\n", skb->len);
 
@@ -1195,6 +1236,7 @@ static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb)
 
 	return ndev->ops->send(ndev, skb);
 }
+EXPORT_SYMBOL(nci_send_frame);
 
 /* Send NCI command */
 int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
@@ -1226,48 +1268,80 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
 
 	return 0;
 }
+EXPORT_SYMBOL(nci_send_cmd);
 
 /* Proprietary commands API */
-static struct nci_prop_ops *prop_cmd_lookup(struct nci_dev *ndev,
-					    __u16 opcode)
+static struct nci_driver_ops *ops_cmd_lookup(struct nci_driver_ops *ops,
+					     size_t n_ops,
+					     __u16 opcode)
 {
 	size_t i;
-	struct nci_prop_ops *prop_op;
+	struct nci_driver_ops *op;
 
-	if (!ndev->ops->prop_ops || !ndev->ops->n_prop_ops)
+	if (!ops || !n_ops)
 		return NULL;
 
-	for (i = 0; i < ndev->ops->n_prop_ops; i++) {
-		prop_op = &ndev->ops->prop_ops[i];
-		if (prop_op->opcode == opcode)
-			return prop_op;
+	for (i = 0; i < n_ops; i++) {
+		op = &ops[i];
+		if (op->opcode == opcode)
+			return op;
 	}
 
 	return NULL;
 }
 
-int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
-			struct sk_buff *skb)
+static int nci_op_rsp_packet(struct nci_dev *ndev, __u16 rsp_opcode,
+			     struct sk_buff *skb, struct nci_driver_ops *ops,
+			     size_t n_ops)
 {
-	struct nci_prop_ops *prop_op;
+	struct nci_driver_ops *op;
 
-	prop_op = prop_cmd_lookup(ndev, rsp_opcode);
-	if (!prop_op || !prop_op->rsp)
+	op = ops_cmd_lookup(ops, n_ops, rsp_opcode);
+	if (!op || !op->rsp)
 		return -ENOTSUPP;
 
-	return prop_op->rsp(ndev, skb);
+	return op->rsp(ndev, skb);
 }
 
-int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
-			struct sk_buff *skb)
+static int nci_op_ntf_packet(struct nci_dev *ndev, __u16 ntf_opcode,
+			     struct sk_buff *skb, struct nci_driver_ops *ops,
+			     size_t n_ops)
 {
-	struct nci_prop_ops *prop_op;
+	struct nci_driver_ops *op;
 
-	prop_op = prop_cmd_lookup(ndev, ntf_opcode);
-	if (!prop_op || !prop_op->ntf)
+	op = ops_cmd_lookup(ops, n_ops, ntf_opcode);
+	if (!op || !op->ntf)
 		return -ENOTSUPP;
 
-	return prop_op->ntf(ndev, skb);
+	return op->ntf(ndev, skb);
+}
+
+int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode,
+			struct sk_buff *skb)
+{
+	return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->prop_ops,
+				 ndev->ops->n_prop_ops);
+}
+
+int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode,
+			struct sk_buff *skb)
+{
+	return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->prop_ops,
+				 ndev->ops->n_prop_ops);
+}
+
+int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode,
+			struct sk_buff *skb)
+{
+	return nci_op_rsp_packet(ndev, opcode, skb, ndev->ops->core_ops,
+				  ndev->ops->n_core_ops);
+}
+
+int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode,
+			struct sk_buff *skb)
+{
+	return nci_op_ntf_packet(ndev, opcode, skb, ndev->ops->core_ops,
+				 ndev->ops->n_core_ops);
 }
 
 /* ---- NCI TX Data worker thread ---- */
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 566466d90048..dbd24254412a 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -90,6 +90,18 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
 	nci_pbf_set((__u8 *)hdr, pbf);
 }
 
+int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id)
+{
+	struct nci_conn_info *conn_info;
+
+	conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+	if (!conn_info)
+		return -EPROTO;
+
+	return conn_info->max_pkt_payload_len;
+}
+EXPORT_SYMBOL(nci_conn_max_data_pkt_payload_size);
+
 static int nci_queue_tx_data_frags(struct nci_dev *ndev,
 				   __u8 conn_id,
 				   struct sk_buff *skb) {
@@ -203,6 +215,7 @@ free_exit:
 exit:
 	return rc;
 }
+EXPORT_SYMBOL(nci_send_data);
 
 /* ----------------- NCI RX Data ----------------- */
 
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 609f92283d1b..2aedac15cb59 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -70,6 +70,7 @@ struct nci_hcp_packet {
 #define NCI_HCI_ANY_SET_PARAMETER  0x01
 #define NCI_HCI_ANY_GET_PARAMETER  0x02
 #define NCI_HCI_ANY_CLOSE_PIPE     0x04
+#define NCI_HCI_ADM_CLEAR_ALL_PIPE 0x14
 
 #define NCI_HFP_NO_CHAINING        0x80
 
@@ -78,6 +79,8 @@ struct nci_hcp_packet {
 #define NCI_EVT_HOT_PLUG           0x03
 
 #define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY       0x01
+#define NCI_HCI_ADM_CREATE_PIPE			0x10
+#define NCI_HCI_ADM_DELETE_PIPE			0x11
 
 /* HCP headers */
 #define NCI_HCI_HCP_PACKET_HEADER_LEN      1
@@ -101,6 +104,20 @@ struct nci_hcp_packet {
 #define NCI_HCP_MSG_GET_CMD(header)  (header & 0x3f)
 #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f)
 
+static int nci_hci_result_to_errno(u8 result)
+{
+	switch (result) {
+	case NCI_HCI_ANY_OK:
+		return 0;
+	case NCI_HCI_ANY_E_REG_PAR_UNKNOWN:
+		return -EOPNOTSUPP;
+	case NCI_HCI_ANY_E_TIMEOUT:
+		return -ETIME;
+	default:
+		return -1;
+	}
+}
+
 /* HCI core */
 static void nci_hci_reset_pipes(struct nci_hci_dev *hdev)
 {
@@ -146,18 +163,18 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 	if (!conn_info)
 		return -EPROTO;
 
-	skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len +
+	i = 0;
+	skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
 			    NCI_DATA_HDR_SIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
-	skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE);
+	skb_reserve(skb, NCI_DATA_HDR_SIZE + 2);
 	*skb_push(skb, 1) = data_type;
 
-	i = 0;
-	len = conn_info->max_pkt_payload_len;
-
 	do {
+		len = conn_info->max_pkt_payload_len;
+
 		/* If last packet add NCI_HFP_NO_CHAINING */
 		if (i + conn_info->max_pkt_payload_len -
 		    (skb->len + 1) >= data_len) {
@@ -177,9 +194,15 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 			return r;
 
 		i += len;
+
 		if (i < data_len) {
-			skb_trim(skb, 0);
-			skb_pull(skb, len);
+			skb = nci_skb_alloc(ndev,
+					    conn_info->max_pkt_payload_len +
+					    NCI_DATA_HDR_SIZE, GFP_KERNEL);
+			if (!skb)
+				return -ENOMEM;
+
+			skb_reserve(skb, NCI_DATA_HDR_SIZE + 1);
 		}
 	} while (i < data_len);
 
@@ -212,7 +235,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
 		     const u8 *param, size_t param_len,
 		     struct sk_buff **skb)
 {
-	struct nci_conn_info    *conn_info;
+	struct nci_hcp_message *message;
+	struct nci_conn_info   *conn_info;
 	struct nci_data data;
 	int r;
 	u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -232,14 +256,34 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
 
 	r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
 			msecs_to_jiffies(NCI_DATA_TIMEOUT));
-
-	if (r == NCI_STATUS_OK && skb)
-		*skb = conn_info->rx_skb;
+	if (r == NCI_STATUS_OK) {
+		message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+		r = nci_hci_result_to_errno(
+			NCI_HCP_MSG_GET_CMD(message->header));
+		skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+
+		if (!r && skb)
+			*skb = conn_info->rx_skb;
+	}
 
 	return r;
 }
 EXPORT_SYMBOL(nci_hci_send_cmd);
 
+int nci_hci_clear_all_pipes(struct nci_dev *ndev)
+{
+	int r;
+
+	r = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
+			     NCI_HCI_ADM_CLEAR_ALL_PIPE, NULL, 0, NULL);
+	if (r < 0)
+		return r;
+
+	nci_hci_reset_pipes(ndev->hci_dev);
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_clear_all_pipes);
+
 static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe,
 				   u8 event, struct sk_buff *skb)
 {
@@ -328,9 +372,6 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
 	struct nci_conn_info    *conn_info;
 	u8 status = result;
 
-	if (result != NCI_HCI_ANY_OK)
-		goto exit;
-
 	conn_info = ndev->hci_dev->conn_info;
 	if (!conn_info) {
 		status = NCI_STATUS_REJECTED;
@@ -340,7 +381,7 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
 	conn_info->rx_skb = skb;
 
 exit:
-	nci_req_complete(ndev, status);
+	nci_req_complete(ndev, NCI_STATUS_OK);
 }
 
 /* Receive hcp message for pipe, with type and cmd.
@@ -366,7 +407,7 @@ static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe,
 		break;
 	}
 
-	nci_req_complete(ndev, 0);
+	nci_req_complete(ndev, NCI_STATUS_OK);
 }
 
 static void nci_hci_msg_rx_work(struct work_struct *work)
@@ -378,7 +419,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work)
 	u8 pipe, type, instruction;
 
 	while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
-		pipe = skb->data[0];
+		pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]);
 		skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
 		message = (struct nci_hcp_message *)skb->data;
 		type = NCI_HCP_MSG_GET_TYPE(message->header);
@@ -395,7 +436,7 @@ void nci_hci_data_received_cb(void *context,
 {
 	struct nci_dev *ndev = (struct nci_dev *)context;
 	struct nci_hcp_packet *packet;
-	u8 pipe, type, instruction;
+	u8 pipe, type;
 	struct sk_buff *hcp_skb;
 	struct sk_buff *frag_skb;
 	int msg_len;
@@ -415,7 +456,7 @@ void nci_hci_data_received_cb(void *context,
 
 	/* it's the last fragment. Does it need re-aggregation? */
 	if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) {
-		pipe = packet->header & NCI_HCI_FRAGMENT;
+		pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
 		skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
 
 		msg_len = 0;
@@ -434,7 +475,7 @@ void nci_hci_data_received_cb(void *context,
 		*skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
 
 		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
-		       msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
+			msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
 			memcpy(skb_put(hcp_skb, msg_len), frag_skb->data +
 			       NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len);
 		}
@@ -452,11 +493,10 @@ void nci_hci_data_received_cb(void *context,
 	packet = (struct nci_hcp_packet *)hcp_skb->data;
 	type = NCI_HCP_MSG_GET_TYPE(packet->message.header);
 	if (type == NCI_HCI_HCP_RESPONSE) {
-		pipe = packet->header;
-		instruction = NCI_HCP_MSG_GET_CMD(packet->message.header);
-		skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN +
-			 NCI_HCI_HCP_MESSAGE_HEADER_LEN);
-		nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb);
+		pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
+		skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
+		nci_hci_hcp_message_rx(ndev, pipe, type,
+				       NCI_STATUS_OK, hcp_skb);
 	} else {
 		skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb);
 		schedule_work(&ndev->hci_dev->msg_rx_work);
@@ -485,9 +525,47 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
 }
 EXPORT_SYMBOL(nci_hci_open_pipe);
 
+static u8 nci_hci_create_pipe(struct nci_dev *ndev, u8 dest_host,
+			      u8 dest_gate, int *result)
+{
+	u8 pipe;
+	struct sk_buff *skb;
+	struct nci_hci_create_pipe_params params;
+	struct nci_hci_create_pipe_resp *resp;
+
+	pr_debug("gate=%d\n", dest_gate);
+
+	params.src_gate = NCI_HCI_ADMIN_GATE;
+	params.dest_host = dest_host;
+	params.dest_gate = dest_gate;
+
+	*result = nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
+				   NCI_HCI_ADM_CREATE_PIPE,
+				   (u8 *)&params, sizeof(params), &skb);
+	if (*result < 0)
+		return NCI_HCI_INVALID_PIPE;
+
+	resp = (struct nci_hci_create_pipe_resp *)skb->data;
+	pipe = resp->pipe;
+	kfree_skb(skb);
+
+	pr_debug("pipe created=%d\n", pipe);
+
+	return pipe;
+}
+
+static int nci_hci_delete_pipe(struct nci_dev *ndev, u8 pipe)
+{
+	pr_debug("\n");
+
+	return nci_hci_send_cmd(ndev, NCI_HCI_ADMIN_GATE,
+				NCI_HCI_ADM_DELETE_PIPE, &pipe, 1, NULL);
+}
+
 int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
 		      const u8 *param, size_t param_len)
 {
+	struct nci_hcp_message *message;
 	struct nci_conn_info *conn_info;
 	struct nci_data data;
 	int r;
@@ -520,6 +598,12 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
 	r = nci_request(ndev, nci_hci_send_data_req,
 			(unsigned long)&data,
 			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+	if (r == NCI_STATUS_OK) {
+		message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+		r = nci_hci_result_to_errno(
+			NCI_HCP_MSG_GET_CMD(message->header));
+		skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+	}
 
 	kfree(tmp);
 	return r;
@@ -529,6 +613,7 @@ EXPORT_SYMBOL(nci_hci_set_param);
 int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
 		      struct sk_buff **skb)
 {
+	struct nci_hcp_message *message;
 	struct nci_conn_info    *conn_info;
 	struct nci_data data;
 	int r;
@@ -553,8 +638,15 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
 	r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
 			msecs_to_jiffies(NCI_DATA_TIMEOUT));
 
-	if (r == NCI_STATUS_OK)
-		*skb = conn_info->rx_skb;
+	if (r == NCI_STATUS_OK) {
+		message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+		r = nci_hci_result_to_errno(
+			NCI_HCP_MSG_GET_CMD(message->header));
+		skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+
+		if (!r && skb)
+			*skb = conn_info->rx_skb;
+	}
 
 	return r;
 }
@@ -563,6 +655,7 @@ EXPORT_SYMBOL(nci_hci_get_param);
 int nci_hci_connect_gate(struct nci_dev *ndev,
 			 u8 dest_host, u8 dest_gate, u8 pipe)
 {
+	bool pipe_created = false;
 	int r;
 
 	if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE)
@@ -581,12 +674,26 @@ int nci_hci_connect_gate(struct nci_dev *ndev,
 	case NCI_HCI_ADMIN_GATE:
 		pipe = NCI_HCI_ADMIN_PIPE;
 	break;
+	default:
+		pipe = nci_hci_create_pipe(ndev, dest_host, dest_gate, &r);
+		if (pipe < 0)
+			return r;
+		pipe_created = true;
+		break;
 	}
 
 open_pipe:
 	r = nci_hci_open_pipe(ndev, pipe);
-	if (r < 0)
+	if (r < 0) {
+		if (pipe_created) {
+			if (nci_hci_delete_pipe(ndev, pipe) < 0) {
+				/* TODO: Cannot clean by deleting pipe...
+				 * -> inconsistent state
+				 */
+			}
+		}
 		return r;
+	}
 
 	ndev->hci_dev->pipes[pipe].gate = dest_gate;
 	ndev->hci_dev->pipes[pipe].host = dest_host;
@@ -653,6 +760,10 @@ int nci_hci_dev_session_init(struct nci_dev *ndev)
 		/* Restore gate<->pipe table from some proprietary location. */
 		r = ndev->ops->hci_load_session(ndev);
 	} else {
+		r = nci_hci_clear_all_pipes(ndev);
+		if (r < 0)
+			goto exit;
+
 		r = nci_hci_dev_connect_gates(ndev,
 					      ndev->hci_dev->init_data.gate_count,
 					      ndev->hci_dev->init_data.gates);
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 5d1c2e391c56..2ada2b39e355 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -759,7 +759,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 	skb_pull(skb, NCI_CTRL_HDR_SIZE);
 
 	if (nci_opcode_gid(ntf_opcode) == NCI_GID_PROPRIETARY) {
-		if (nci_prop_ntf_packet(ndev, ntf_opcode, skb)) {
+		if (nci_prop_ntf_packet(ndev, ntf_opcode, skb) == -ENOTSUPP) {
 			pr_err("unsupported ntf opcode 0x%x\n",
 			       ntf_opcode);
 		}
@@ -805,6 +805,7 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		break;
 	}
 
+	nci_core_ntf_packet(ndev, ntf_opcode, skb);
 end:
 	kfree_skb(skb);
 }
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 408bd8f857ab..9b6eb913d801 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -355,6 +355,7 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		break;
 	}
 
+	nci_core_rsp_packet(ndev, rsp_opcode, skb);
 end:
 	kfree_skb(skb);
 
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index ec250e77763a..d904cd2f1442 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -18,6 +18,8 @@
 
 #define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__
 
+#include <linux/module.h>
+
 #include <linux/export.h>
 #include <linux/spi/spi.h>
 #include <linux/crc-ccitt.h>
@@ -56,6 +58,7 @@ static int __nci_spi_send(struct nci_spi *nspi, struct sk_buff *skb,
 	}
 	t.cs_change = cs_change;
 	t.delay_usecs = nspi->xfer_udelay;
+	t.speed_hz = nspi->xfer_speed_hz;
 
 	spi_message_init(&m);
 	spi_message_add_tail(&t, &m);
@@ -142,7 +145,8 @@ struct nci_spi *nci_spi_allocate_spi(struct spi_device *spi,
 
 	nspi->acknowledge_mode = acknowledge_mode;
 	nspi->xfer_udelay = delay;
-
+	/* Use controller max SPI speed by default */
+	nspi->xfer_speed_hz = 0;
 	nspi->spi = spi;
 	nspi->ndev = ndev;
 	init_completion(&nspi->req_completion);
@@ -195,12 +199,14 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi)
 	tx.tx_buf = req;
 	tx.len = 2;
 	tx.cs_change = 0;
+	tx.speed_hz = nspi->xfer_speed_hz;
 	spi_message_add_tail(&tx, &m);
 
 	memset(&rx, 0, sizeof(struct spi_transfer));
 	rx.rx_buf = resp_hdr;
 	rx.len = 2;
 	rx.cs_change = 1;
+	rx.speed_hz = nspi->xfer_speed_hz;
 	spi_message_add_tail(&rx, &m);
 
 	ret = spi_sync(nspi->spi, &m);
@@ -224,6 +230,7 @@ static struct sk_buff *__nci_spi_read(struct nci_spi *nspi)
 	rx.len = rx_len;
 	rx.cs_change = 0;
 	rx.delay_usecs = nspi->xfer_udelay;
+	rx.speed_hz = nspi->xfer_speed_hz;
 	spi_message_add_tail(&rx, &m);
 
 	ret = spi_sync(nspi->spi, &m);
@@ -320,3 +327,5 @@ done:
 	return skb;
 }
 EXPORT_SYMBOL_GPL(nci_spi_read);
+
+MODULE_LICENSE("GPL");
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 853172c27f68..f58c1fba1026 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -885,7 +885,7 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info)
 	target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
 	protocol = nla_get_u32(info->attrs[NFC_ATTR_PROTOCOLS]);
 
-	nfc_deactivate_target(dev, target_idx);
+	nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP);
 	rc = nfc_activate_target(dev, target_idx, protocol);
 
 	nfc_put_device(dev);
@@ -1109,10 +1109,8 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
 	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
 
 	dev = nfc_get_device(idx);
-	if (!dev) {
-		rc = -ENODEV;
-		goto exit;
-	}
+	if (!dev)
+		return -ENODEV;
 
 	device_lock(&dev->dev);
 
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 5c93e8412a26..c20b784ad720 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -25,6 +25,9 @@
 #include <net/nfc/nfc.h>
 #include <net/sock.h>
 
+#define NFC_TARGET_MODE_IDLE 0
+#define NFC_TARGET_MODE_SLEEP 1
+
 struct nfc_protocol {
 	int id;
 	struct proto *proto;
@@ -147,7 +150,7 @@ int nfc_dep_link_down(struct nfc_dev *dev);
 
 int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol);
 
-int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx);
+int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode);
 
 int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
 		      data_exchange_cb_t cb, void *cb_context);
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e9a91488fe3d..e386e6c90b17 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -321,7 +321,8 @@ static void rawsock_destruct(struct sock *sk)
 
 	if (sk->sk_state == TCP_ESTABLISHED) {
 		nfc_deactivate_target(nfc_rawsock(sk)->dev,
-				      nfc_rawsock(sk)->target_idx);
+				      nfc_rawsock(sk)->target_idx,
+				      NFC_TARGET_MODE_IDLE);
 		nfc_put_device(nfc_rawsock(sk)->dev);
 	}
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index dba635d086b2..c88d0f2d3e01 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -620,7 +620,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 	return 0;
 }
 
-static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
+static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
 	struct vport *vport = data->vport;
@@ -679,8 +679,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
 	skb_pull(skb, hlen);
 }
 
-static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
-			 __be16 ethertype)
+static void ovs_fragment(struct net *net, struct vport *vport,
+			 struct sk_buff *skb, u16 mru, __be16 ethertype)
 {
 	if (skb_network_offset(skb) > MAX_L2_LEN) {
 		OVS_NLERR(1, "L2 header too long to fragment");
@@ -700,7 +700,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
 		skb_dst_set_noref(skb, &ovs_dst);
 		IPCB(skb)->frag_max_size = mru;
 
-		ip_do_fragment(skb->sk, skb, ovs_vport_output);
+		ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
 		refdst_drop(orig_dst);
 	} else if (ethertype == htons(ETH_P_IPV6)) {
 		const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
@@ -721,7 +721,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
 		skb_dst_set_noref(skb, &ovs_rt.dst);
 		IP6CB(skb)->frag_max_size = mru;
 
-		v6ops->fragment(skb->sk, skb, ovs_vport_output);
+		v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
 		refdst_drop(orig_dst);
 	} else {
 		WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
@@ -746,6 +746,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
 		if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
 			ovs_vport_send(vport, skb);
 		} else if (mru <= vport->dev->mtu) {
+			struct net *net = read_pnet(&dp->net);
 			__be16 ethertype = key->eth.type;
 
 			if (!is_flow_key_valid(key)) {
@@ -755,7 +756,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
 					ethertype = vlan_get_protocol(skb);
 			}
 
-			ovs_fragment(vport, skb, mru, ethertype);
+			ovs_fragment(net, vport, skb, mru, ethertype);
 		} else {
 			kfree_skb(skb);
 		}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 50095820edb7..c2cc11168fd5 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -306,7 +306,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 		int err;
 
 		memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
-		err = ip_defrag(skb, user);
+		err = ip_defrag(net, skb, user);
 		if (err)
 			return err;
 
@@ -317,7 +317,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
 		struct sk_buff *reasm;
 
 		memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
-		reasm = nf_ct_frag6_gather(skb, user);
+		reasm = nf_ct_frag6_gather(net, skb, user);
 		if (!reasm)
 			return -EINPROGRESS;
 
@@ -357,7 +357,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
 {
 	struct nf_conntrack_tuple tuple;
 
-	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
 		return NULL;
 	return __nf_ct_expect_find(net, zone, &tuple);
 }
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c5d08ee37730..91a8b004dc51 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -91,8 +91,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
 static void ovs_notify(struct genl_family *family,
 		       struct sk_buff *skb, struct genl_info *info)
 {
-	genl_notify(family, skb, genl_info_net(info), info->snd_portid,
-		    0, info->nlhdr, GFP_KERNEL);
+	genl_notify(family, skb, info, 0, GFP_KERNEL);
 }
 
 /**
@@ -1176,7 +1175,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 						info, OVS_FLOW_CMD_NEW, false,
 						ufid_flags);
 
-		if (unlikely(IS_ERR(reply))) {
+		if (IS_ERR(reply)) {
 			error = PTR_ERR(reply);
 			goto err_unlock_ovs;
 		}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c8db44ab2ee7..0ea128eeeab2 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -698,8 +698,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 {
 	/* Extract metadata from packet. */
 	if (tun_info) {
-		if (ip_tunnel_info_af(tun_info) != AF_INET)
-			return -EINVAL;
+		key->tun_proto = ip_tunnel_info_af(tun_info);
 		memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
 
 		if (tun_info->options_len) {
@@ -714,6 +713,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 			key->tun_opts_len = 0;
 		}
 	} else  {
+		key->tun_proto = 0;
 		key->tun_opts_len = 0;
 		memset(&key->tun_key, 0, sizeof(key->tun_key));
 	}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 8cfa15a08668..1d055c559eaf 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -63,6 +63,7 @@ struct sw_flow_key {
 		u32	skb_mark;	/* SKB mark. */
 		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
 	} __packed phy; /* Safe when right after 'tun_key'. */
+	u8 tun_proto;			/* Protocol of encapsulating tunnel. */
 	u32 ovs_flow_hash;		/* Datapath computed hash value.  */
 	u32 recirc_id;			/* Recirculation ID.  */
 	struct {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 38536c137c54..907d6fd28ede 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -262,8 +262,8 @@ size_t ovs_tun_key_attr_size(void)
 	 * updating this function.
 	 */
 	return    nla_total_size(8)    /* OVS_TUNNEL_KEY_ATTR_ID */
-		+ nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
-		+ nla_total_size(4)    /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
+		+ nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
 		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
 		+ nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
@@ -323,6 +323,8 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
 	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
 	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
 						.next = ovs_vxlan_ext_key_lens },
+	[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
+	[OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
 };
 
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
@@ -542,15 +544,15 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
 	return 0;
 }
 
-static int ipv4_tun_from_nlattr(const struct nlattr *attr,
-				struct sw_flow_match *match, bool is_mask,
-				bool log)
+static int ip_tun_from_nlattr(const struct nlattr *attr,
+			      struct sw_flow_match *match, bool is_mask,
+			      bool log)
 {
-	struct nlattr *a;
-	int rem;
-	bool ttl = false;
+	bool ttl = false, ipv4 = false, ipv6 = false;
 	__be16 tun_flags = 0;
 	int opts_type = 0;
+	struct nlattr *a;
+	int rem;
 
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
@@ -578,10 +580,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
 			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
 					nla_get_in_addr(a), is_mask);
+			ipv4 = true;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
 			SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
 					nla_get_in_addr(a), is_mask);
+			ipv4 = true;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
+			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+					nla_get_in6_addr(a), is_mask);
+			ipv6 = true;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
+			SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+					nla_get_in6_addr(a), is_mask);
+			ipv6 = true;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_TOS:
 			SW_FLOW_KEY_PUT(match, tun_key.tos,
@@ -636,28 +650,46 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			opts_type = type;
 			break;
 		default:
-			OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
+			OVS_NLERR(log, "Unknown IP tunnel attribute %d",
 				  type);
 			return -EINVAL;
 		}
 	}
 
 	SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+	if (is_mask)
+		SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
+	else
+		SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
+				false);
 
 	if (rem > 0) {
-		OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
+		OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
 			  rem);
 		return -EINVAL;
 	}
 
+	if (ipv4 && ipv6) {
+		OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
+		return -EINVAL;
+	}
+
 	if (!is_mask) {
-		if (!match->key->tun_key.u.ipv4.dst) {
+		if (!ipv4 && !ipv6) {
+			OVS_NLERR(log, "IP tunnel dst address not specified");
+			return -EINVAL;
+		}
+		if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
 			OVS_NLERR(log, "IPv4 tunnel dst address is zero");
 			return -EINVAL;
 		}
+		if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
+			OVS_NLERR(log, "IPv6 tunnel dst address is zero");
+			return -EINVAL;
+		}
 
 		if (!ttl) {
-			OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
+			OVS_NLERR(log, "IP tunnel TTL not specified.");
 			return -EINVAL;
 		}
 	}
@@ -682,21 +714,36 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb,
 	return 0;
 }
 
-static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
-				const struct ip_tunnel_key *output,
-				const void *tun_opts, int swkey_tun_opts_len)
+static int __ip_tun_to_nlattr(struct sk_buff *skb,
+			      const struct ip_tunnel_key *output,
+			      const void *tun_opts, int swkey_tun_opts_len,
+			      unsigned short tun_proto)
 {
 	if (output->tun_flags & TUNNEL_KEY &&
 	    nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
 		return -EMSGSIZE;
-	if (output->u.ipv4.src &&
-	    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
-			    output->u.ipv4.src))
-		return -EMSGSIZE;
-	if (output->u.ipv4.dst &&
-	    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
-			    output->u.ipv4.dst))
-		return -EMSGSIZE;
+	switch (tun_proto) {
+	case AF_INET:
+		if (output->u.ipv4.src &&
+		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
+				    output->u.ipv4.src))
+			return -EMSGSIZE;
+		if (output->u.ipv4.dst &&
+		    nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
+				    output->u.ipv4.dst))
+			return -EMSGSIZE;
+		break;
+	case AF_INET6:
+		if (!ipv6_addr_any(&output->u.ipv6.src) &&
+		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
+				     &output->u.ipv6.src))
+			return -EMSGSIZE;
+		if (!ipv6_addr_any(&output->u.ipv6.dst) &&
+		    nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
+				     &output->u.ipv6.dst))
+			return -EMSGSIZE;
+		break;
+	}
 	if (output->tos &&
 	    nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
 		return -EMSGSIZE;
@@ -730,9 +777,10 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	return 0;
 }
 
-static int ipv4_tun_to_nlattr(struct sk_buff *skb,
-			      const struct ip_tunnel_key *output,
-			      const void *tun_opts, int swkey_tun_opts_len)
+static int ip_tun_to_nlattr(struct sk_buff *skb,
+			    const struct ip_tunnel_key *output,
+			    const void *tun_opts, int swkey_tun_opts_len,
+			    unsigned short tun_proto)
 {
 	struct nlattr *nla;
 	int err;
@@ -741,7 +789,8 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if (!nla)
 		return -EMSGSIZE;
 
-	err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
+	err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
+				 tun_proto);
 	if (err)
 		return err;
 
@@ -752,9 +801,10 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 int ovs_nla_put_tunnel_info(struct sk_buff *skb,
 			    struct ip_tunnel_info *tun_info)
 {
-	return __ipv4_tun_to_nlattr(skb, &tun_info->key,
-				    ip_tunnel_info_opts(tun_info),
-				    tun_info->options_len);
+	return __ip_tun_to_nlattr(skb, &tun_info->key,
+				  ip_tunnel_info_opts(tun_info),
+				  tun_info->options_len,
+				  ip_tunnel_info_af(tun_info));
 }
 
 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -805,8 +855,8 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 		*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
 	}
 	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
-		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
-					 is_mask, log) < 0)
+		if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+				       is_mask, log) < 0)
 			return -EINVAL;
 		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
 	}
@@ -1202,7 +1252,7 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
 			/* The userspace does not send tunnel attributes that
 			 * are 0, but we should not wildcard them nonetheless.
 			 */
-			if (match->key->tun_key.u.ipv4.dst)
+			if (match->key->tun_proto)
 				SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
 							 0xff, true);
 
@@ -1375,14 +1425,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 	if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
 		goto nla_put_failure;
 
-	if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
+	if ((swkey->tun_proto || is_mask)) {
 		const void *opts = NULL;
 
 		if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
 			opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
 
-		if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
-				       swkey->tun_opts_len))
+		if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
+				     swkey->tun_opts_len, swkey->tun_proto))
 			goto nla_put_failure;
 	}
 
@@ -1885,7 +1935,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	int err = 0, start, opts_type;
 
 	ovs_match_init(&match, &key, NULL);
-	opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
 	if (opts_type < 0)
 		return opts_type;
 
@@ -1921,6 +1971,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 
 	tun_info = &tun_dst->u.tun_info;
 	tun_info->mode = IP_TUNNEL_INFO_TX;
+	if (key.tun_proto == AF_INET6)
+		tun_info->mode |= IP_TUNNEL_INFO_IPV6;
 	tun_info->key = key.tun_key;
 
 	/* We need to store the options in the action itself since
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c7f74aab34b9..d073fff82fdb 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -428,7 +428,7 @@ static u32 flow_hash(const struct sw_flow_key *key,
 
 static int flow_key_start(const struct sw_flow_key *key)
 {
-	if (key->tun_key.u.ipv4.dst)
+	if (key->tun_proto)
 		return 0;
 	else
 		return rounddown(offsetof(struct sw_flow_key, phy),
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 5f8aaaaa0785..efb736bb6855 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -116,7 +116,7 @@ static struct vport_ops ovs_geneve_vport_ops = {
 	.create		= geneve_create,
 	.destroy	= ovs_netdev_tunnel_destroy,
 	.get_options	= geneve_get_options,
-	.send		= ovs_netdev_send,
+	.send		= dev_queue_xmit,
 	.owner          = THIS_MODULE,
 };
 
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 64225bf5eb40..c3257d78d3d2 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -87,7 +87,7 @@ static struct vport *gre_create(const struct vport_parms *parms)
 static struct vport_ops ovs_gre_vport_ops = {
 	.type		= OVS_VPORT_TYPE_GRE,
 	.create		= gre_create,
-	.send		= ovs_netdev_send,
+	.send		= dev_queue_xmit,
 	.destroy	= ovs_netdev_tunnel_destroy,
 	.owner		= THIS_MODULE,
 };
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index b3934126daa8..ec76398a792f 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -242,22 +242,21 @@ static void internal_dev_destroy(struct vport *vport)
 	rtnl_unlock();
 }
 
-static void internal_dev_recv(struct vport *vport, struct sk_buff *skb)
+static netdev_tx_t internal_dev_recv(struct sk_buff *skb)
 {
-	struct net_device *netdev = vport->dev;
+	struct net_device *netdev = skb->dev;
 	struct pcpu_sw_netstats *stats;
 
 	if (unlikely(!(netdev->flags & IFF_UP))) {
 		kfree_skb(skb);
 		netdev->stats.rx_dropped++;
-		return;
+		return NETDEV_TX_OK;
 	}
 
 	skb_dst_drop(skb);
 	nf_reset(skb);
 	secpath_reset(skb);
 
-	skb->dev = netdev;
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, netdev);
 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@ -269,6 +268,7 @@ static void internal_dev_recv(struct vport *vport, struct sk_buff *skb)
 	u64_stats_update_end(&stats->syncp);
 
 	netif_rx(skb);
+	return NETDEV_TX_OK;
 }
 
 static struct vport_ops ovs_internal_vport_ops = {
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index f7e8dcce7ada..b327368a3848 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -190,37 +190,6 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
 }
 EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
 
-static unsigned int packet_length(const struct sk_buff *skb)
-{
-	unsigned int length = skb->len - ETH_HLEN;
-
-	if (skb->protocol == htons(ETH_P_8021Q))
-		length -= VLAN_HLEN;
-
-	return length;
-}
-
-void ovs_netdev_send(struct vport *vport, struct sk_buff *skb)
-{
-	int mtu = vport->dev->mtu;
-
-	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
-		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
-				     vport->dev->name,
-				     packet_length(skb), mtu);
-		vport->dev->stats.tx_errors++;
-		goto drop;
-	}
-
-	skb->dev = vport->dev;
-	dev_queue_xmit(skb);
-	return;
-
-drop:
-	kfree_skb(skb);
-}
-EXPORT_SYMBOL_GPL(ovs_netdev_send);
-
 /* Returns null if this device is not attached to a datapath. */
 struct vport *ovs_netdev_get_vport(struct net_device *dev)
 {
@@ -235,7 +204,7 @@ static struct vport_ops ovs_netdev_vport_ops = {
 	.type		= OVS_VPORT_TYPE_NETDEV,
 	.create		= netdev_create,
 	.destroy	= netdev_destroy,
-	.send		= ovs_netdev_send,
+	.send		= dev_queue_xmit,
 };
 
 int __init ovs_netdev_init(void)
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index bf22fcedbc69..19e29c12adcc 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -27,7 +27,6 @@
 struct vport *ovs_netdev_get_vport(struct net_device *dev);
 
 struct vport *ovs_netdev_link(struct vport *vport, const char *name);
-void ovs_netdev_send(struct vport *vport, struct sk_buff *skb);
 void ovs_netdev_detach_dev(struct vport *);
 
 int __init ovs_netdev_init(void);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index e1c9c0888037..1605691d9414 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -151,7 +151,7 @@ static struct vport_ops ovs_vxlan_netdev_vport_ops = {
 	.create			= vxlan_create,
 	.destroy		= ovs_netdev_tunnel_destroy,
 	.get_options		= vxlan_get_options,
-	.send			= ovs_netdev_send,
+	.send			= dev_queue_xmit,
 };
 
 static int __init ovs_vxlan_tnl_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 320c765ce44a..0ac0fd004d7e 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -479,3 +479,33 @@ void ovs_vport_deferred_free(struct vport *vport)
 	call_rcu(&vport->rcu, free_vport_rcu);
 }
 EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
+
+static unsigned int packet_length(const struct sk_buff *skb)
+{
+	unsigned int length = skb->len - ETH_HLEN;
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		length -= VLAN_HLEN;
+
+	return length;
+}
+
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+{
+	int mtu = vport->dev->mtu;
+
+	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+		net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
+				     vport->dev->name,
+				     packet_length(skb), mtu);
+		vport->dev->stats.tx_errors++;
+		goto drop;
+	}
+
+	skb->dev = vport->dev;
+	vport->ops->send(skb);
+	return;
+
+drop:
+	kfree_skb(skb);
+}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index d341ad6f3afe..bdfd82a7c064 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -140,7 +140,7 @@ struct vport_ops {
 	int (*set_options)(struct vport *, struct nlattr *);
 	int (*get_options)(const struct vport *, struct sk_buff *);
 
-	void (*send)(struct vport *, struct sk_buff *);
+	netdev_tx_t (*send) (struct sk_buff *skb);
 	struct module *owner;
 	struct list_head list;
 };
@@ -199,9 +199,25 @@ static inline const char *ovs_vport_name(struct vport *vport)
 int ovs_vport_ops_register(struct vport_ops *ops);
 void ovs_vport_ops_unregister(struct vport_ops *ops);
 
-static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
+						     const struct ip_tunnel_key *key,
+						     u32 mark,
+						     struct flowi4 *fl,
+						     u8 protocol)
 {
-	vport->ops->send(vport, skb);
+	struct rtable *rt;
+
+	memset(fl, 0, sizeof(*fl));
+	fl->daddr = key->u.ipv4.dst;
+	fl->saddr = key->u.ipv4.src;
+	fl->flowi4_tos = RT_TOS(key->tos);
+	fl->flowi4_mark = mark;
+	fl->flowi4_proto = protocol;
+
+	rt = ip_route_output_key(net, fl);
+	return rt;
 }
 
+void ovs_vport_send(struct vport *vport, struct sk_buff *skb);
+
 #endif /* vport.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index aa4b15c35884..af399cac5205 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1423,7 +1423,7 @@ static unsigned int fanout_demux_bpf(struct packet_fanout *f,
 	rcu_read_lock();
 	prog = rcu_dereference(f->bpf_prog);
 	if (prog)
-		ret = BPF_PROG_RUN(prog, skb) % num;
+		ret = bpf_prog_run_clear_cb(prog, skb) % num;
 	rcu_read_unlock();
 
 	return ret;
@@ -1439,17 +1439,17 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 {
 	struct packet_fanout *f = pt->af_packet_priv;
 	unsigned int num = READ_ONCE(f->num_members);
+	struct net *net = read_pnet(&f->net);
 	struct packet_sock *po;
 	unsigned int idx;
 
-	if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
-	    !num) {
+	if (!net_eq(dev_net(dev), net) || !num) {
 		kfree_skb(skb);
 		return 0;
 	}
 
 	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
-		skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
+		skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
 		if (!skb)
 			return 0;
 	}
@@ -1519,10 +1519,10 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
 
 static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
 {
-	if (ptype->af_packet_priv == (void *)((struct packet_sock *)sk)->fanout)
-		return true;
+	if (sk->sk_family != PF_PACKET)
+		return false;
 
-	return false;
+	return ptype->af_packet_priv == pkt_sk(sk)->fanout;
 }
 
 static void fanout_init_data(struct packet_fanout *f)
@@ -1567,7 +1567,7 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
 	if (copy_from_user(&fprog, data, len))
 		return -EFAULT;
 
-	ret = bpf_prog_create_from_user(&new, &fprog, NULL);
+	ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
 	if (ret)
 		return ret;
 
@@ -1939,16 +1939,16 @@ out_free:
 	return err;
 }
 
-static unsigned int run_filter(const struct sk_buff *skb,
-				      const struct sock *sk,
-				      unsigned int res)
+static unsigned int run_filter(struct sk_buff *skb,
+			       const struct sock *sk,
+			       unsigned int res)
 {
 	struct sk_filter *filter;
 
 	rcu_read_lock();
 	filter = rcu_dereference(sk->sk_filter);
 	if (filter != NULL)
-		res = SK_RUN_FILTER(filter, skb);
+		res = bpf_prog_run_clear_cb(filter->prog, skb);
 	rcu_read_unlock();
 
 	return res;
@@ -2630,6 +2630,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	__be16 proto;
 	unsigned char *addr;
 	int err, reserve = 0;
+	struct sockcm_cookie sockc;
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int offset = 0;
 	int vnet_hdr_len;
@@ -2665,6 +2666,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_unlock;
 
+	sockc.mark = sk->sk_mark;
+	if (msg->msg_controllen) {
+		err = sock_cmsg_send(sk, msg, &sockc);
+		if (unlikely(err))
+			goto out_unlock;
+	}
+
 	if (sock->type == SOCK_RAW)
 		reserve = dev->hard_header_len;
 	if (po->has_vnet_hdr) {
@@ -2774,7 +2782,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	skb->protocol = proto;
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = sockc.mark;
 
 	packet_pick_tx_queue(dev, skb);
 
@@ -2903,22 +2911,40 @@ static int packet_release(struct socket *sock)
  *	Attach a packet hook.
  */
 
-static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
+static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
+			  __be16 proto)
 {
 	struct packet_sock *po = pkt_sk(sk);
 	struct net_device *dev_curr;
 	__be16 proto_curr;
 	bool need_rehook;
+	struct net_device *dev = NULL;
+	int ret = 0;
+	bool unlisted = false;
 
-	if (po->fanout) {
-		if (dev)
-			dev_put(dev);
-
+	if (po->fanout)
 		return -EINVAL;
-	}
 
 	lock_sock(sk);
 	spin_lock(&po->bind_lock);
+	rcu_read_lock();
+
+	if (name) {
+		dev = dev_get_by_name_rcu(sock_net(sk), name);
+		if (!dev) {
+			ret = -ENODEV;
+			goto out_unlock;
+		}
+	} else if (ifindex) {
+		dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
+		if (!dev) {
+			ret = -ENODEV;
+			goto out_unlock;
+		}
+	}
+
+	if (dev)
+		dev_hold(dev);
 
 	proto_curr = po->prot_hook.type;
 	dev_curr = po->prot_hook.dev;
@@ -2926,14 +2952,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
 	need_rehook = proto_curr != proto || dev_curr != dev;
 
 	if (need_rehook) {
-		unregister_prot_hook(sk, true);
+		if (po->running) {
+			rcu_read_unlock();
+			__unregister_prot_hook(sk, true);
+			rcu_read_lock();
+			dev_curr = po->prot_hook.dev;
+			if (dev)
+				unlisted = !dev_get_by_index_rcu(sock_net(sk),
+								 dev->ifindex);
+		}
 
 		po->num = proto;
 		po->prot_hook.type = proto;
-		po->prot_hook.dev = dev;
 
-		po->ifindex = dev ? dev->ifindex : 0;
-		packet_cached_dev_assign(po, dev);
+		if (unlikely(unlisted)) {
+			dev_put(dev);
+			po->prot_hook.dev = NULL;
+			po->ifindex = -1;
+			packet_cached_dev_reset(po);
+		} else {
+			po->prot_hook.dev = dev;
+			po->ifindex = dev ? dev->ifindex : 0;
+			packet_cached_dev_assign(po, dev);
+		}
 	}
 	if (dev_curr)
 		dev_put(dev_curr);
@@ -2941,7 +2982,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
 	if (proto == 0 || !need_rehook)
 		goto out_unlock;
 
-	if (!dev || (dev->flags & IFF_UP)) {
+	if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
 		register_prot_hook(sk);
 	} else {
 		sk->sk_err = ENETDOWN;
@@ -2950,9 +2991,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
 	}
 
 out_unlock:
+	rcu_read_unlock();
 	spin_unlock(&po->bind_lock);
 	release_sock(sk);
-	return 0;
+	return ret;
 }
 
 /*
@@ -2964,8 +3006,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
 {
 	struct sock *sk = sock->sk;
 	char name[15];
-	struct net_device *dev;
-	int err = -ENODEV;
 
 	/*
 	 *	Check legality
@@ -2975,19 +3015,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
 		return -EINVAL;
 	strlcpy(name, uaddr->sa_data, sizeof(name));
 
-	dev = dev_get_by_name(sock_net(sk), name);
-	if (dev)
-		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
-	return err;
+	return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
 }
 
 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
 	struct sock *sk = sock->sk;
-	struct net_device *dev = NULL;
-	int err;
-
 
 	/*
 	 *	Check legality
@@ -2998,16 +3032,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
 	if (sll->sll_family != AF_PACKET)
 		return -EINVAL;
 
-	if (sll->sll_ifindex) {
-		err = -ENODEV;
-		dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
-		if (dev == NULL)
-			goto out;
-	}
-	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
-
-out:
-	return err;
+	return packet_do_bind(sk, NULL, sll->sll_ifindex,
+			      sll->sll_protocol ? : pkt_sk(sk)->num);
 }
 
 static struct proto packet_proto = {
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index a2f28a6d4dc5..b5476aebd68d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -72,13 +72,7 @@ static int rds_release(struct socket *sock)
 	rds_clear_recv_queue(rs);
 	rds_cong_remove_socket(rs);
 
-	/*
-	 * the binding lookup hash uses rcu, we need to
-	 * make sure we synchronize_rcu before we free our
-	 * entry
-	 */
 	rds_remove_bound(rs);
-	synchronize_rcu();
 
 	rds_send_drop_to(rs, NULL);
 	rds_rdma_drop_keys(rs);
@@ -579,6 +573,7 @@ static void rds_exit(void)
 	rds_threads_exit();
 	rds_stats_exit();
 	rds_page_exit();
+	rds_bind_lock_destroy();
 	rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
 	rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
 }
@@ -588,9 +583,14 @@ static int rds_init(void)
 {
 	int ret;
 
-	ret = rds_conn_init();
+	ret = rds_bind_lock_init();
 	if (ret)
 		goto out;
+
+	ret = rds_conn_init();
+	if (ret)
+		goto out_bind;
+
 	ret = rds_threads_init();
 	if (ret)
 		goto out_conn;
@@ -624,6 +624,8 @@ out_conn:
 	rds_conn_exit();
 	rds_cong_exit();
 	rds_page_exit();
+out_bind:
+	rds_bind_lock_destroy();
 out:
 	return ret;
 }
diff --git a/net/rds/bind.c b/net/rds/bind.c
index dd666fb9b4e1..b22ea956522b 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -38,51 +38,16 @@
 #include <linux/ratelimit.h>
 #include "rds.h"
 
-#define BIND_HASH_SIZE 1024
-static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
-static DEFINE_SPINLOCK(rds_bind_lock);
+static struct rhashtable bind_hash_table;
 
-static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
-{
-	return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
-				  (BIND_HASH_SIZE - 1));
-}
-
-static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
-					struct rds_sock *insert)
-{
-	struct rds_sock *rs;
-	struct hlist_head *head = hash_to_bucket(addr, port);
-	u64 cmp;
-	u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(rs, head, rs_bound_node) {
-		cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
-		      be16_to_cpu(rs->rs_bound_port);
-
-		if (cmp == needle) {
-			rcu_read_unlock();
-			return rs;
-		}
-	}
-	rcu_read_unlock();
-
-	if (insert) {
-		/*
-		 * make sure our addr and port are set before
-		 * we are added to the list, other people
-		 * in rcu will find us as soon as the
-		 * hlist_add_head_rcu is done
-		 */
-		insert->rs_bound_addr = addr;
-		insert->rs_bound_port = port;
-		rds_sock_addref(insert);
-
-		hlist_add_head_rcu(&insert->rs_bound_node, head);
-	}
-	return NULL;
-}
+static struct rhashtable_params ht_parms = {
+	.nelem_hint = 768,
+	.key_len = sizeof(u64),
+	.key_offset = offsetof(struct rds_sock, rs_bound_key),
+	.head_offset = offsetof(struct rds_sock, rs_bound_node),
+	.max_size = 16384,
+	.min_size = 1024,
+};
 
 /*
  * Return the rds_sock bound at the given local address.
@@ -92,10 +57,10 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
  */
 struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
 {
+	u64 key = ((u64)addr << 32) | port;
 	struct rds_sock *rs;
 
-	rs = rds_bind_lookup(addr, port, NULL);
-
+	rs = rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms);
 	if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
 		rds_sock_addref(rs);
 	else
@@ -103,15 +68,16 @@ struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
 
 	rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
 		ntohs(port));
+
 	return rs;
 }
 
 /* returns -ve errno or +ve port */
 static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 {
-	unsigned long flags;
 	int ret = -EADDRINUSE;
 	u16 rover, last;
+	u64 key;
 
 	if (*port != 0) {
 		rover = be16_to_cpu(*port);
@@ -121,42 +87,49 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
 		last = rover - 1;
 	}
 
-	spin_lock_irqsave(&rds_bind_lock, flags);
-
 	do {
 		if (rover == 0)
 			rover++;
-		if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
+
+		key = ((u64)addr << 32) | cpu_to_be16(rover);
+		if (rhashtable_lookup_fast(&bind_hash_table, &key, ht_parms))
+			continue;
+
+		rs->rs_bound_key = key;
+		rs->rs_bound_addr = addr;
+		rs->rs_bound_port = cpu_to_be16(rover);
+		rs->rs_bound_node.next = NULL;
+		rds_sock_addref(rs);
+		if (!rhashtable_insert_fast(&bind_hash_table,
+					    &rs->rs_bound_node, ht_parms)) {
 			*port = rs->rs_bound_port;
 			ret = 0;
 			rdsdebug("rs %p binding to %pI4:%d\n",
 			  rs, &addr, (int)ntohs(*port));
 			break;
+		} else {
+			rds_sock_put(rs);
+			ret = -ENOMEM;
+			break;
 		}
 	} while (rover++ != last);
 
-	spin_unlock_irqrestore(&rds_bind_lock, flags);
-
 	return ret;
 }
 
 void rds_remove_bound(struct rds_sock *rs)
 {
-	unsigned long flags;
 
-	spin_lock_irqsave(&rds_bind_lock, flags);
+	if (!rs->rs_bound_addr)
+		return;
 
-	if (rs->rs_bound_addr) {
-		rdsdebug("rs %p unbinding from %pI4:%d\n",
-		  rs, &rs->rs_bound_addr,
-		  ntohs(rs->rs_bound_port));
-
-		hlist_del_init_rcu(&rs->rs_bound_node);
-		rds_sock_put(rs);
-		rs->rs_bound_addr = 0;
-	}
+	rdsdebug("rs %p unbinding from %pI4:%d\n",
+		 rs, &rs->rs_bound_addr,
+		 ntohs(rs->rs_bound_port));
 
-	spin_unlock_irqrestore(&rds_bind_lock, flags);
+	rhashtable_remove_fast(&bind_hash_table, &rs->rs_bound_node, ht_parms);
+	rds_sock_put(rs);
+	rs->rs_bound_addr = 0;
 }
 
 int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -182,7 +155,14 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		goto out;
 
 	if (rs->rs_transport) { /* previously bound */
-		ret = 0;
+		trans = rs->rs_transport;
+		if (trans->laddr_check(sock_net(sock->sk),
+				       sin->sin_addr.s_addr) != 0) {
+			ret = -ENOPROTOOPT;
+			rds_remove_bound(rs);
+		} else {
+			ret = 0;
+		}
 		goto out;
 	}
 	trans = rds_trans_get_preferred(sock_net(sock->sk),
@@ -200,9 +180,15 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 out:
 	release_sock(sk);
-
-	/* we might have called rds_remove_bound on error */
-	if (ret)
-		synchronize_rcu();
 	return ret;
 }
+
+void rds_bind_lock_destroy(void)
+{
+	rhashtable_destroy(&bind_hash_table);
+}
+
+int rds_bind_lock_init(void)
+{
+	return rhashtable_init(&bind_hash_table, &ht_parms);
+}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 49adeef8090c..d4564036a339 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -128,10 +128,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	struct rds_transport *loop_trans;
 	unsigned long flags;
 	int ret;
-	struct rds_transport *otrans = trans;
 
-	if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
-		goto new_conn;
 	rcu_read_lock();
 	conn = rds_conn_lookup(net, head, laddr, faddr, trans);
 	if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
@@ -147,7 +144,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	if (conn)
 		goto out;
 
-new_conn:
 	conn = kmem_cache_zalloc(rds_conn_slab, gfp);
 	if (!conn) {
 		conn = ERR_PTR(-ENOMEM);
@@ -207,6 +203,7 @@ new_conn:
 
 	atomic_set(&conn->c_state, RDS_CONN_DOWN);
 	conn->c_send_gen = 0;
+	conn->c_outgoing = (is_outgoing ? 1 : 0);
 	conn->c_reconnect_jiffies = 0;
 	INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
 	INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
@@ -243,22 +240,13 @@ new_conn:
 		/* Creating normal conn */
 		struct rds_connection *found;
 
-		if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
-			found = NULL;
-		else
-			found = rds_conn_lookup(net, head, laddr, faddr, trans);
+		found = rds_conn_lookup(net, head, laddr, faddr, trans);
 		if (found) {
 			trans->conn_free(conn->c_transport_data);
 			kmem_cache_free(rds_conn_slab, conn);
 			conn = found;
 		} else {
-			if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) ||
-			    (otrans->t_type != RDS_TRANS_TCP)) {
-				/* Only the active side should be added to
-				 * reconnect list for TCP.
-				 */
-				hlist_add_head_rcu(&conn->c_hash_node, head);
-			}
+			hlist_add_head_rcu(&conn->c_hash_node, head);
 			rds_cong_add_conn(conn);
 			rds_conn_count++;
 		}
@@ -337,7 +325,9 @@ void rds_conn_shutdown(struct rds_connection *conn)
 	rcu_read_lock();
 	if (!hlist_unhashed(&conn->c_hash_node)) {
 		rcu_read_unlock();
-		rds_queue_reconnect(conn);
+		if (conn->c_trans->t_type != RDS_TRANS_TCP ||
+		    conn->c_outgoing == 1)
+			rds_queue_reconnect(conn);
 	} else {
 		rcu_read_unlock();
 	}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 2d3f2ab475df..f222885ac0c7 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -43,14 +43,14 @@
 #include "rds.h"
 #include "ib.h"
 
-static unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE;
-unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */
+unsigned int rds_ib_fmr_1m_pool_size = RDS_FMR_1M_POOL_SIZE;
+unsigned int rds_ib_fmr_8k_pool_size = RDS_FMR_8K_POOL_SIZE;
 unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
 
-module_param(fmr_pool_size, int, 0444);
-MODULE_PARM_DESC(fmr_pool_size, " Max number of fmr per HCA");
-module_param(fmr_message_size, int, 0444);
-MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
+module_param(rds_ib_fmr_1m_pool_size, int, 0444);
+MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1M fmr per HCA");
+module_param(rds_ib_fmr_8k_pool_size, int, 0444);
+MODULE_PARM_DESC(rds_ib_fmr_8k_pool_size, " Max number of 8K fmr per HCA");
 module_param(rds_ib_retry_count, int, 0444);
 MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
 
@@ -97,8 +97,10 @@ static void rds_ib_dev_free(struct work_struct *work)
 	struct rds_ib_device *rds_ibdev = container_of(work,
 					struct rds_ib_device, free_work);
 
-	if (rds_ibdev->mr_pool)
-		rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
+	if (rds_ibdev->mr_8k_pool)
+		rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool);
+	if (rds_ibdev->mr_1m_pool)
+		rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
 	if (rds_ibdev->pd)
 		ib_dealloc_pd(rds_ibdev->pd);
 
@@ -148,9 +150,13 @@ static void rds_ib_add_one(struct ib_device *device)
 	rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
 
 	rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
-	rds_ibdev->max_fmrs = dev_attr->max_fmr ?
-			min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
-			fmr_pool_size;
+	rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
+		min_t(unsigned int, (dev_attr->max_mr / 2),
+		      rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
+
+	rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
+		min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+		      rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
 
 	rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
 	rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
@@ -162,12 +168,25 @@ static void rds_ib_add_one(struct ib_device *device)
 		goto put_dev;
 	}
 
-	rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
-	if (IS_ERR(rds_ibdev->mr_pool)) {
-		rds_ibdev->mr_pool = NULL;
+	rds_ibdev->mr_1m_pool =
+		rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
+	if (IS_ERR(rds_ibdev->mr_1m_pool)) {
+		rds_ibdev->mr_1m_pool = NULL;
 		goto put_dev;
 	}
 
+	rds_ibdev->mr_8k_pool =
+		rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL);
+	if (IS_ERR(rds_ibdev->mr_8k_pool)) {
+		rds_ibdev->mr_8k_pool = NULL;
+		goto put_dev;
+	}
+
+	rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
+		 dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+		 rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
+		 rds_ibdev->max_8k_fmrs);
+
 	INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
 	INIT_LIST_HEAD(&rds_ibdev->conn_list);
 
@@ -317,7 +336,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
 	/* Create a CMA ID and try to bind it. This catches both
 	 * IB and iWARP capable NICs.
 	 */
-	cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+	cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 
diff --git a/net/rds/ib.h b/net/rds/ib.h
index aae60fda77f6..b3fdebb57460 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -9,8 +9,11 @@
 #include "rds.h"
 #include "rdma_transport.h"
 
-#define RDS_FMR_SIZE			256
-#define RDS_FMR_POOL_SIZE		8192
+#define RDS_FMR_1M_POOL_SIZE		(8192 / 2)
+#define RDS_FMR_1M_MSG_SIZE		256
+#define RDS_FMR_8K_MSG_SIZE		2
+#define RDS_MR_8K_SCALE			(256 / (RDS_FMR_8K_MSG_SIZE + 1))
+#define RDS_FMR_8K_POOL_SIZE		(RDS_MR_8K_SCALE * (8192 / 2))
 
 #define RDS_IB_MAX_SGE			8
 #define RDS_IB_RECV_SGE 		2
@@ -24,6 +27,9 @@
 
 #define RDS_IB_RECYCLE_BATCH_COUNT	32
 
+#define RDS_IB_WC_MAX			32
+#define RDS_IB_SEND_OP			BIT_ULL(63)
+
 extern struct rw_semaphore rds_ib_devices_lock;
 extern struct list_head rds_ib_devices;
 
@@ -69,7 +75,11 @@ struct rds_ib_connect_private {
 
 struct rds_ib_send_work {
 	void			*s_op;
-	struct ib_send_wr	s_wr;
+	union {
+		struct ib_send_wr	s_wr;
+		struct ib_rdma_wr	s_rdma_wr;
+		struct ib_atomic_wr	s_atomic_wr;
+	};
 	struct ib_sge		s_sge[RDS_IB_MAX_SGE];
 	unsigned long		s_queued;
 };
@@ -89,6 +99,20 @@ struct rds_ib_work_ring {
 	atomic_t	w_free_ctr;
 };
 
+/* Rings are posted with all the allocations they'll need to queue the
+ * incoming message to the receiving socket so this can't fail.
+ * All fragments start with a header, so we can make sure we're not receiving
+ * garbage, and we can tell a small 8 byte fragment from an ACK frame.
+ */
+struct rds_ib_ack_state {
+	u64		ack_next;
+	u64		ack_recv;
+	unsigned int	ack_required:1;
+	unsigned int	ack_next_valid:1;
+	unsigned int	ack_recv_valid:1;
+};
+
+
 struct rds_ib_device;
 
 struct rds_ib_connection {
@@ -102,6 +126,12 @@ struct rds_ib_connection {
 	struct ib_pd		*i_pd;
 	struct ib_cq		*i_send_cq;
 	struct ib_cq		*i_recv_cq;
+	struct ib_wc		i_send_wc[RDS_IB_WC_MAX];
+	struct ib_wc		i_recv_wc[RDS_IB_WC_MAX];
+
+	/* interrupt handling */
+	struct tasklet_struct	i_send_tasklet;
+	struct tasklet_struct	i_recv_tasklet;
 
 	/* tx */
 	struct rds_ib_work_ring	i_send_ring;
@@ -112,7 +142,6 @@ struct rds_ib_connection {
 	atomic_t		i_signaled_sends;
 
 	/* rx */
-	struct tasklet_struct	i_recv_tasklet;
 	struct mutex		i_recv_mutex;
 	struct rds_ib_work_ring	i_recv_ring;
 	struct rds_ib_incoming	*i_ibinc;
@@ -164,6 +193,12 @@ struct rds_ib_connection {
 struct rds_ib_ipaddr {
 	struct list_head	list;
 	__be32			ipaddr;
+	struct rcu_head		rcu;
+};
+
+enum {
+	RDS_IB_MR_8K_POOL,
+	RDS_IB_MR_1M_POOL,
 };
 
 struct rds_ib_device {
@@ -172,9 +207,12 @@ struct rds_ib_device {
 	struct list_head	conn_list;
 	struct ib_device	*dev;
 	struct ib_pd		*pd;
-	struct rds_ib_mr_pool	*mr_pool;
-	unsigned int		fmr_max_remaps;
 	unsigned int		max_fmrs;
+	struct rds_ib_mr_pool	*mr_1m_pool;
+	struct rds_ib_mr_pool   *mr_8k_pool;
+	unsigned int		fmr_max_remaps;
+	unsigned int		max_8k_fmrs;
+	unsigned int		max_1m_fmrs;
 	int			max_sge;
 	unsigned int		max_wrs;
 	unsigned int		max_initiator_depth;
@@ -197,14 +235,14 @@ struct rds_ib_device {
 struct rds_ib_statistics {
 	uint64_t	s_ib_connect_raced;
 	uint64_t	s_ib_listen_closed_stale;
-	uint64_t	s_ib_tx_cq_call;
+	uint64_t	s_ib_evt_handler_call;
+	uint64_t	s_ib_tasklet_call;
 	uint64_t	s_ib_tx_cq_event;
 	uint64_t	s_ib_tx_ring_full;
 	uint64_t	s_ib_tx_throttle;
 	uint64_t	s_ib_tx_sg_mapping_failure;
 	uint64_t	s_ib_tx_stalled;
 	uint64_t	s_ib_tx_credit_updates;
-	uint64_t	s_ib_rx_cq_call;
 	uint64_t	s_ib_rx_cq_event;
 	uint64_t	s_ib_rx_ring_empty;
 	uint64_t	s_ib_rx_refill_from_cq;
@@ -216,12 +254,18 @@ struct rds_ib_statistics {
 	uint64_t	s_ib_ack_send_delayed;
 	uint64_t	s_ib_ack_send_piggybacked;
 	uint64_t	s_ib_ack_received;
-	uint64_t	s_ib_rdma_mr_alloc;
-	uint64_t	s_ib_rdma_mr_free;
-	uint64_t	s_ib_rdma_mr_used;
-	uint64_t	s_ib_rdma_mr_pool_flush;
-	uint64_t	s_ib_rdma_mr_pool_wait;
-	uint64_t	s_ib_rdma_mr_pool_depleted;
+	uint64_t	s_ib_rdma_mr_8k_alloc;
+	uint64_t	s_ib_rdma_mr_8k_free;
+	uint64_t	s_ib_rdma_mr_8k_used;
+	uint64_t	s_ib_rdma_mr_8k_pool_flush;
+	uint64_t	s_ib_rdma_mr_8k_pool_wait;
+	uint64_t	s_ib_rdma_mr_8k_pool_depleted;
+	uint64_t	s_ib_rdma_mr_1m_alloc;
+	uint64_t	s_ib_rdma_mr_1m_free;
+	uint64_t	s_ib_rdma_mr_1m_used;
+	uint64_t	s_ib_rdma_mr_1m_pool_flush;
+	uint64_t	s_ib_rdma_mr_1m_pool_wait;
+	uint64_t	s_ib_rdma_mr_1m_pool_depleted;
 	uint64_t	s_ib_atomic_cswp;
 	uint64_t	s_ib_atomic_fadd;
 };
@@ -273,7 +317,8 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
 void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
 extern struct ib_client rds_ib_client;
 
-extern unsigned int fmr_message_size;
+extern unsigned int rds_ib_fmr_1m_pool_size;
+extern unsigned int rds_ib_fmr_8k_pool_size;
 extern unsigned int rds_ib_retry_count;
 
 extern spinlock_t ib_nodev_conns_lock;
@@ -303,7 +348,8 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
 void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
 void rds_ib_destroy_nodev_conns(void);
-struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
+struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
+					     int npages);
 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -323,7 +369,8 @@ void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
 void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
 void rds_ib_inc_free(struct rds_incoming *inc);
 int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
-void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
+void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
+			     struct rds_ib_ack_state *state);
 void rds_ib_recv_tasklet_fn(unsigned long data);
 void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
 void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
@@ -331,6 +378,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
 void rds_ib_attempt_ack(struct rds_ib_connection *ic);
 void rds_ib_ack_send_complete(struct rds_ib_connection *ic);
 u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic);
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required);
 
 /* ib_ring.c */
 void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr);
@@ -348,7 +396,7 @@ extern wait_queue_head_t rds_ib_ring_empty_wait;
 void rds_ib_xmit_complete(struct rds_connection *conn);
 int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 		unsigned int hdr_off, unsigned int sg, unsigned int off);
-void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
+void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
 void rds_ib_send_init_ring(struct rds_ib_connection *ic);
 void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
 int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 9043f5c04787..da5a7fb98c77 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -216,6 +216,96 @@ static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
 		 event->event, ib_event_msg(event->event), data);
 }
 
+/* Plucking the oldest entry from the ring can be done concurrently with
+ * the thread refilling the ring.  Each ring operation is protected by
+ * spinlocks and the transient state of refilling doesn't change the
+ * recording of which entry is oldest.
+ *
+ * This relies on IB only calling one cq comp_handler for each cq so that
+ * there will only be one caller of rds_recv_incoming() per RDS connection.
+ */
+static void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context)
+{
+	struct rds_connection *conn = context;
+	struct rds_ib_connection *ic = conn->c_transport_data;
+
+	rdsdebug("conn %p cq %p\n", conn, cq);
+
+	rds_ib_stats_inc(s_ib_evt_handler_call);
+
+	tasklet_schedule(&ic->i_recv_tasklet);
+}
+
+static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
+		    struct ib_wc *wcs,
+		    struct rds_ib_ack_state *ack_state)
+{
+	int nr;
+	int i;
+	struct ib_wc *wc;
+
+	while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
+		for (i = 0; i < nr; i++) {
+			wc = wcs + i;
+			rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
+				 (unsigned long long)wc->wr_id, wc->status,
+				 wc->byte_len, be32_to_cpu(wc->ex.imm_data));
+
+			if (wc->wr_id & RDS_IB_SEND_OP)
+				rds_ib_send_cqe_handler(ic, wc);
+			else
+				rds_ib_recv_cqe_handler(ic, wc, ack_state);
+		}
+	}
+}
+
+static void rds_ib_tasklet_fn_send(unsigned long data)
+{
+	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
+	struct rds_connection *conn = ic->conn;
+	struct rds_ib_ack_state state;
+
+	rds_ib_stats_inc(s_ib_tasklet_call);
+
+	memset(&state, 0, sizeof(state));
+	poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state);
+	ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
+	poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state);
+
+	if (rds_conn_up(conn) &&
+	    (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
+	    test_bit(0, &conn->c_map_queued)))
+		rds_send_xmit(ic->conn);
+}
+
+static void rds_ib_tasklet_fn_recv(unsigned long data)
+{
+	struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
+	struct rds_connection *conn = ic->conn;
+	struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
+	struct rds_ib_ack_state state;
+
+	if (!rds_ibdev)
+		rds_conn_drop(conn);
+
+	rds_ib_stats_inc(s_ib_tasklet_call);
+
+	memset(&state, 0, sizeof(state));
+	poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
+	ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
+	poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
+
+	if (state.ack_next_valid)
+		rds_ib_set_ack(ic, state.ack_next, state.ack_required);
+	if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
+		rds_send_drop_acked(conn, state.ack_recv, NULL);
+		ic->i_ack_recv = state.ack_recv;
+	}
+
+	if (rds_conn_up(conn))
+		rds_ib_attempt_ack(ic);
+}
+
 static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
 {
 	struct rds_connection *conn = data;
@@ -238,6 +328,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
 	}
 }
 
+static void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context)
+{
+	struct rds_connection *conn = context;
+	struct rds_ib_connection *ic = conn->c_transport_data;
+
+	rdsdebug("conn %p cq %p\n", conn, cq);
+
+	rds_ib_stats_inc(s_ib_evt_handler_call);
+
+	tasklet_schedule(&ic->i_send_tasklet);
+}
+
 /*
  * This needs to be very careful to not leave IS_ERR pointers around for
  * cleanup to trip over.
@@ -271,7 +373,8 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	ic->i_pd = rds_ibdev->pd;
 
 	cq_attr.cqe = ic->i_send_ring.w_nr + 1;
-	ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler,
+
+	ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
 				     rds_ib_cq_event_handler, conn,
 				     &cq_attr);
 	if (IS_ERR(ic->i_send_cq)) {
@@ -282,7 +385,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
 	}
 
 	cq_attr.cqe = ic->i_recv_ring.w_nr;
-	ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler,
+	ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
 				     rds_ib_cq_event_handler, conn,
 				     &cq_attr);
 	if (IS_ERR(ic->i_recv_cq)) {
@@ -565,7 +668,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
 
 	/* XXX I wonder what affect the port space has */
 	/* delegate cm event handler to rdma_transport */
-	ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+	ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
 				     RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(ic->i_cm_id)) {
 		ret = PTR_ERR(ic->i_cm_id);
@@ -637,6 +740,7 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
 		wait_event(rds_ib_ring_empty_wait,
 			   rds_ib_ring_empty(&ic->i_recv_ring) &&
 			   (atomic_read(&ic->i_signaled_sends) == 0));
+		tasklet_kill(&ic->i_send_tasklet);
 		tasklet_kill(&ic->i_recv_tasklet);
 
 		/* first destroy the ib state that generates callbacks */
@@ -743,8 +847,10 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
 	}
 
 	INIT_LIST_HEAD(&ic->ib_node);
-	tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,
-		     (unsigned long) ic);
+	tasklet_init(&ic->i_send_tasklet, rds_ib_tasklet_fn_send,
+		     (unsigned long)ic);
+	tasklet_init(&ic->i_recv_tasklet, rds_ib_tasklet_fn_recv,
+		     (unsigned long)ic);
 	mutex_init(&ic->i_recv_mutex);
 #ifndef KERNEL_HAS_ATOMIC64
 	spin_lock_init(&ic->i_ack_lock);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 251d1ce0b7c7..a2340748ec86 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -65,6 +65,7 @@ struct rds_ib_mr {
  * Our own little FMR pool
  */
 struct rds_ib_mr_pool {
+	unsigned int            pool_type;
 	struct mutex		flush_lock;		/* serialize fmr invalidate */
 	struct delayed_work	flush_worker;		/* flush worker */
 
@@ -83,7 +84,7 @@ struct rds_ib_mr_pool {
 	struct ib_fmr_attr	fmr_attr;
 };
 
-struct workqueue_struct *rds_ib_fmr_wq;
+static struct workqueue_struct *rds_ib_fmr_wq;
 
 int rds_ib_fmr_init(void)
 {
@@ -159,10 +160,8 @@ static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
 	}
 	spin_unlock_irq(&rds_ibdev->spinlock);
 
-	if (to_free) {
-		synchronize_rcu();
-		kfree(to_free);
-	}
+	if (to_free)
+		kfree_rcu(to_free, rcu);
 }
 
 int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
@@ -236,7 +235,8 @@ void rds_ib_destroy_nodev_conns(void)
 		rds_conn_destroy(ic->conn);
 }
 
-struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
+struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
+					     int pool_type)
 {
 	struct rds_ib_mr_pool *pool;
 
@@ -244,6 +244,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
 	if (!pool)
 		return ERR_PTR(-ENOMEM);
 
+	pool->pool_type = pool_type;
 	init_llist_head(&pool->free_list);
 	init_llist_head(&pool->drop_list);
 	init_llist_head(&pool->clean_list);
@@ -251,28 +252,30 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
 	init_waitqueue_head(&pool->flush_wait);
 	INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
 
-	pool->fmr_attr.max_pages = fmr_message_size;
+	if (pool_type == RDS_IB_MR_1M_POOL) {
+		/* +1 allows for unaligned MRs */
+		pool->fmr_attr.max_pages = RDS_FMR_1M_MSG_SIZE + 1;
+		pool->max_items = RDS_FMR_1M_POOL_SIZE;
+	} else {
+		/* pool_type == RDS_IB_MR_8K_POOL */
+		pool->fmr_attr.max_pages = RDS_FMR_8K_MSG_SIZE + 1;
+		pool->max_items = RDS_FMR_8K_POOL_SIZE;
+	}
+
+	pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
 	pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
 	pool->fmr_attr.page_shift = PAGE_SHIFT;
-	pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4;
-
-	/* We never allow more than max_items MRs to be allocated.
-	 * When we exceed more than max_items_soft, we start freeing
-	 * items more aggressively.
-	 * Make sure that max_items > max_items_soft > max_items / 2
-	 */
 	pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4;
-	pool->max_items = rds_ibdev->max_fmrs;
 
 	return pool;
 }
 
 void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
 {
-	struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
+	struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
 
-	iinfo->rdma_mr_max = pool->max_items;
-	iinfo->rdma_mr_size = pool->fmr_attr.max_pages;
+	iinfo->rdma_mr_max = pool_1m->max_items;
+	iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
 }
 
 void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
@@ -314,14 +317,28 @@ static inline void wait_clean_list_grace(void)
 	}
 }
 
-static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
+static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev,
+					  int npages)
 {
-	struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
+	struct rds_ib_mr_pool *pool;
 	struct rds_ib_mr *ibmr = NULL;
 	int err = 0, iter = 0;
 
+	if (npages <= RDS_FMR_8K_MSG_SIZE)
+		pool = rds_ibdev->mr_8k_pool;
+	else
+		pool = rds_ibdev->mr_1m_pool;
+
 	if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
-		schedule_delayed_work(&pool->flush_worker, 10);
+		queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
+
+	/* Switch pools if one of the pool is reaching upper limit */
+	if (atomic_read(&pool->dirty_count) >=  pool->max_items * 9 / 10) {
+		if (pool->pool_type == RDS_IB_MR_8K_POOL)
+			pool = rds_ibdev->mr_1m_pool;
+		else
+			pool = rds_ibdev->mr_8k_pool;
+	}
 
 	while (1) {
 		ibmr = rds_ib_reuse_fmr(pool);
@@ -343,12 +360,18 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
 		atomic_dec(&pool->item_count);
 
 		if (++iter > 2) {
-			rds_ib_stats_inc(s_ib_rdma_mr_pool_depleted);
+			if (pool->pool_type == RDS_IB_MR_8K_POOL)
+				rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
+			else
+				rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
 			return ERR_PTR(-EAGAIN);
 		}
 
 		/* We do have some empty MRs. Flush them out. */
-		rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
+		if (pool->pool_type == RDS_IB_MR_8K_POOL)
+			rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
+		else
+			rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
 		rds_ib_flush_mr_pool(pool, 0, &ibmr);
 		if (ibmr)
 			return ibmr;
@@ -373,7 +396,12 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
 		goto out_no_cigar;
 	}
 
-	rds_ib_stats_inc(s_ib_rdma_mr_alloc);
+	ibmr->pool = pool;
+	if (pool->pool_type == RDS_IB_MR_8K_POOL)
+		rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
+	else
+		rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
+
 	return ibmr;
 
 out_no_cigar:
@@ -429,7 +457,7 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
 	}
 
 	page_cnt += len >> PAGE_SHIFT;
-	if (page_cnt > fmr_message_size)
+	if (page_cnt > ibmr->pool->fmr_attr.max_pages)
 		return -EINVAL;
 
 	dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
@@ -461,7 +489,10 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
 	ibmr->sg_dma_len = sg_dma_len;
 	ibmr->remap_count++;
 
-	rds_ib_stats_inc(s_ib_rdma_mr_used);
+	if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
+		rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
+	else
+		rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
 	ret = 0;
 
 out:
@@ -524,8 +555,7 @@ static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
 
 	__rds_ib_teardown_mr(ibmr);
 	if (pinned) {
-		struct rds_ib_device *rds_ibdev = ibmr->device;
-		struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
+		struct rds_ib_mr_pool *pool = ibmr->pool;
 
 		atomic_sub(pinned, &pool->free_pinned);
 	}
@@ -594,7 +624,7 @@ static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
  * to free as many MRs as needed to get back to this limit.
  */
 static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
-			        int free_all, struct rds_ib_mr **ibmr_ret)
+				int free_all, struct rds_ib_mr **ibmr_ret)
 {
 	struct rds_ib_mr *ibmr, *next;
 	struct llist_node *clean_nodes;
@@ -605,11 +635,14 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
 	unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
 	int ret = 0;
 
-	rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
+	if (pool->pool_type == RDS_IB_MR_8K_POOL)
+		rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
+	else
+		rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush);
 
 	if (ibmr_ret) {
 		DEFINE_WAIT(wait);
-		while(!mutex_trylock(&pool->flush_lock)) {
+		while (!mutex_trylock(&pool->flush_lock)) {
 			ibmr = rds_ib_reuse_fmr(pool);
 			if (ibmr) {
 				*ibmr_ret = ibmr;
@@ -666,8 +699,12 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
 	list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
 		unpinned += ibmr->sg_len;
 		__rds_ib_teardown_mr(ibmr);
-		if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
-			rds_ib_stats_inc(s_ib_rdma_mr_free);
+		if (nfreed < free_goal ||
+		    ibmr->remap_count >= pool->fmr_attr.max_maps) {
+			if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
+				rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
+			else
+				rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
 			list_del(&ibmr->unmap_list);
 			ib_dealloc_fmr(ibmr->fmr);
 			kfree(ibmr);
@@ -719,8 +756,8 @@ static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
 void rds_ib_free_mr(void *trans_private, int invalidate)
 {
 	struct rds_ib_mr *ibmr = trans_private;
+	struct rds_ib_mr_pool *pool = ibmr->pool;
 	struct rds_ib_device *rds_ibdev = ibmr->device;
-	struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
 
 	rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
 
@@ -759,10 +796,11 @@ void rds_ib_flush_mrs(void)
 
 	down_read(&rds_ib_devices_lock);
 	list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
-		struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
+		if (rds_ibdev->mr_8k_pool)
+			rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL);
 
-		if (pool)
-			rds_ib_flush_mr_pool(pool, 0, NULL);
+		if (rds_ibdev->mr_1m_pool)
+			rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL);
 	}
 	up_read(&rds_ib_devices_lock);
 }
@@ -780,12 +818,12 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
 		goto out;
 	}
 
-	if (!rds_ibdev->mr_pool) {
+	if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
 		ret = -ENODEV;
 		goto out;
 	}
 
-	ibmr = rds_ib_alloc_fmr(rds_ibdev);
+	ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
 	if (IS_ERR(ibmr)) {
 		rds_ib_dev_put(rds_ibdev);
 		return ibmr;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index f43831e4186a..977fb86065b7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -305,7 +305,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
 	gfp_t slab_mask = GFP_NOWAIT;
 	gfp_t page_mask = GFP_NOWAIT;
 
-	if (gfp & __GFP_WAIT) {
+	if (gfp & __GFP_DIRECT_RECLAIM) {
 		slab_mask = GFP_KERNEL;
 		page_mask = GFP_HIGHUSER;
 	}
@@ -379,7 +379,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 	struct ib_recv_wr *failed_wr;
 	unsigned int posted = 0;
 	int ret = 0;
-	bool can_wait = !!(gfp & __GFP_WAIT);
+	bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
 	u32 pos;
 
 	/* the goal here is to just make sure that someone, somewhere
@@ -596,8 +596,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
  * wr_id and avoids working with the ring in that case.
  */
 #ifndef KERNEL_HAS_ATOMIC64
-static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
-				int ack_required)
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
 {
 	unsigned long flags;
 
@@ -622,8 +621,7 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
 	return seq;
 }
 #else
-static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
-				int ack_required)
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
 {
 	atomic64_set(&ic->i_ack_next, seq);
 	if (ack_required) {
@@ -830,20 +828,6 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
 	rds_cong_map_updated(map, uncongested);
 }
 
-/*
- * Rings are posted with all the allocations they'll need to queue the
- * incoming message to the receiving socket so this can't fail.
- * All fragments start with a header, so we can make sure we're not receiving
- * garbage, and we can tell a small 8 byte fragment from an ACK frame.
- */
-struct rds_ib_ack_state {
-	u64		ack_next;
-	u64		ack_recv;
-	unsigned int	ack_required:1;
-	unsigned int	ack_next_valid:1;
-	unsigned int	ack_recv_valid:1;
-};
-
 static void rds_ib_process_recv(struct rds_connection *conn,
 				struct rds_ib_recv_work *recv, u32 data_len,
 				struct rds_ib_ack_state *state)
@@ -969,96 +953,50 @@ static void rds_ib_process_recv(struct rds_connection *conn,
 	}
 }
 
-/*
- * Plucking the oldest entry from the ring can be done concurrently with
- * the thread refilling the ring.  Each ring operation is protected by
- * spinlocks and the transient state of refilling doesn't change the
- * recording of which entry is oldest.
- *
- * This relies on IB only calling one cq comp_handler for each cq so that
- * there will only be one caller of rds_recv_incoming() per RDS connection.
- */
-void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
-{
-	struct rds_connection *conn = context;
-	struct rds_ib_connection *ic = conn->c_transport_data;
-
-	rdsdebug("conn %p cq %p\n", conn, cq);
-
-	rds_ib_stats_inc(s_ib_rx_cq_call);
-
-	tasklet_schedule(&ic->i_recv_tasklet);
-}
-
-static inline void rds_poll_cq(struct rds_ib_connection *ic,
-			       struct rds_ib_ack_state *state)
+void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
+			     struct ib_wc *wc,
+			     struct rds_ib_ack_state *state)
 {
 	struct rds_connection *conn = ic->conn;
-	struct ib_wc wc;
 	struct rds_ib_recv_work *recv;
 
-	while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
-		rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
-			 (unsigned long long)wc.wr_id, wc.status,
-			 ib_wc_status_msg(wc.status), wc.byte_len,
-			 be32_to_cpu(wc.ex.imm_data));
-		rds_ib_stats_inc(s_ib_rx_cq_event);
+	rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
+		 (unsigned long long)wc->wr_id, wc->status,
+		 ib_wc_status_msg(wc->status), wc->byte_len,
+		 be32_to_cpu(wc->ex.imm_data));
 
-		recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
-
-		ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
-
-		/*
-		 * Also process recvs in connecting state because it is possible
-		 * to get a recv completion _before_ the rdmacm ESTABLISHED
-		 * event is processed.
-		 */
-		if (wc.status == IB_WC_SUCCESS) {
-			rds_ib_process_recv(conn, recv, wc.byte_len, state);
-		} else {
-			/* We expect errors as the qp is drained during shutdown */
-			if (rds_conn_up(conn) || rds_conn_connecting(conn))
-				rds_ib_conn_error(conn, "recv completion on %pI4 had "
-						  "status %u (%s), disconnecting and "
-						  "reconnecting\n", &conn->c_faddr,
-						  wc.status,
-						  ib_wc_status_msg(wc.status));
-		}
+	rds_ib_stats_inc(s_ib_rx_cq_event);
+	recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
+	ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1,
+			DMA_FROM_DEVICE);
 
-		/*
-		 * rds_ib_process_recv() doesn't always consume the frag, and
-		 * we might not have called it at all if the wc didn't indicate
-		 * success. We already unmapped the frag's pages, though, and
-		 * the following rds_ib_ring_free() call tells the refill path
-		 * that it will not find an allocated frag here. Make sure we
-		 * keep that promise by freeing a frag that's still on the ring.
-		 */
-		if (recv->r_frag) {
-			rds_ib_frag_free(ic, recv->r_frag);
-			recv->r_frag = NULL;
-		}
-		rds_ib_ring_free(&ic->i_recv_ring, 1);
+	/* Also process recvs in connecting state because it is possible
+	 * to get a recv completion _before_ the rdmacm ESTABLISHED
+	 * event is processed.
+	 */
+	if (wc->status == IB_WC_SUCCESS) {
+		rds_ib_process_recv(conn, recv, wc->byte_len, state);
+	} else {
+		/* We expect errors as the qp is drained during shutdown */
+		if (rds_conn_up(conn) || rds_conn_connecting(conn))
+			rds_ib_conn_error(conn, "recv completion on %pI4 had status %u (%s), disconnecting and reconnecting\n",
+					  &conn->c_faddr,
+					  wc->status,
+					  ib_wc_status_msg(wc->status));
 	}
-}
 
-void rds_ib_recv_tasklet_fn(unsigned long data)
-{
-	struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
-	struct rds_connection *conn = ic->conn;
-	struct rds_ib_ack_state state = { 0, };
-
-	rds_poll_cq(ic, &state);
-	ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
-	rds_poll_cq(ic, &state);
-
-	if (state.ack_next_valid)
-		rds_ib_set_ack(ic, state.ack_next, state.ack_required);
-	if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
-		rds_send_drop_acked(conn, state.ack_recv, NULL);
-		ic->i_ack_recv = state.ack_recv;
+	/* rds_ib_process_recv() doesn't always consume the frag, and
+	 * we might not have called it at all if the wc didn't indicate
+	 * success. We already unmapped the frag's pages, though, and
+	 * the following rds_ib_ring_free() call tells the refill path
+	 * that it will not find an allocated frag here. Make sure we
+	 * keep that promise by freeing a frag that's still on the ring.
+	 */
+	if (recv->r_frag) {
+		rds_ib_frag_free(ic, recv->r_frag);
+		recv->r_frag = NULL;
 	}
-	if (rds_conn_up(conn))
-		rds_ib_attempt_ack(ic);
+	rds_ib_ring_free(&ic->i_recv_ring, 1);
 
 	/* If we ever end up with a really empty receive ring, we're
 	 * in deep trouble, as the sender will definitely see RNR
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 4e88047086b6..eac30bf486d7 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -195,7 +195,7 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
 
 		send->s_op = NULL;
 
-		send->s_wr.wr_id = i;
+		send->s_wr.wr_id = i | RDS_IB_SEND_OP;
 		send->s_wr.sg_list = send->s_sge;
 		send->s_wr.ex.imm_data = 0;
 
@@ -237,81 +237,73 @@ static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
  * unallocs the next free entry in the ring it doesn't alter which is
  * the next to be freed, which is what this is concerned with.
  */
-void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
+void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
 {
-	struct rds_connection *conn = context;
-	struct rds_ib_connection *ic = conn->c_transport_data;
 	struct rds_message *rm = NULL;
-	struct ib_wc wc;
+	struct rds_connection *conn = ic->conn;
 	struct rds_ib_send_work *send;
 	u32 completed;
 	u32 oldest;
 	u32 i = 0;
-	int ret;
 	int nr_sig = 0;
 
-	rdsdebug("cq %p conn %p\n", cq, conn);
-	rds_ib_stats_inc(s_ib_tx_cq_call);
-	ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-	if (ret)
-		rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
-
-	while (ib_poll_cq(cq, 1, &wc) > 0) {
-		rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
-			 (unsigned long long)wc.wr_id, wc.status,
-			 ib_wc_status_msg(wc.status), wc.byte_len,
-			 be32_to_cpu(wc.ex.imm_data));
-		rds_ib_stats_inc(s_ib_tx_cq_event);
-
-		if (wc.wr_id == RDS_IB_ACK_WR_ID) {
-			if (time_after(jiffies, ic->i_ack_queued + HZ/2))
-				rds_ib_stats_inc(s_ib_tx_stalled);
-			rds_ib_ack_send_complete(ic);
-			continue;
-		}
 
-		oldest = rds_ib_ring_oldest(&ic->i_send_ring);
+	rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
+		 (unsigned long long)wc->wr_id, wc->status,
+		 ib_wc_status_msg(wc->status), wc->byte_len,
+		 be32_to_cpu(wc->ex.imm_data));
+	rds_ib_stats_inc(s_ib_tx_cq_event);
 
-		completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
+	if (wc->wr_id == RDS_IB_ACK_WR_ID) {
+		if (time_after(jiffies, ic->i_ack_queued + HZ / 2))
+			rds_ib_stats_inc(s_ib_tx_stalled);
+		rds_ib_ack_send_complete(ic);
+		return;
+	}
 
-		for (i = 0; i < completed; i++) {
-			send = &ic->i_sends[oldest];
-			if (send->s_wr.send_flags & IB_SEND_SIGNALED)
-				nr_sig++;
+	oldest = rds_ib_ring_oldest(&ic->i_send_ring);
 
-			rm = rds_ib_send_unmap_op(ic, send, wc.status);
+	completed = rds_ib_ring_completed(&ic->i_send_ring,
+					  (wc->wr_id & ~RDS_IB_SEND_OP),
+					  oldest);
 
-			if (time_after(jiffies, send->s_queued + HZ/2))
-				rds_ib_stats_inc(s_ib_tx_stalled);
+	for (i = 0; i < completed; i++) {
+		send = &ic->i_sends[oldest];
+		if (send->s_wr.send_flags & IB_SEND_SIGNALED)
+			nr_sig++;
 
-			if (send->s_op) {
-				if (send->s_op == rm->m_final_op) {
-					/* If anyone waited for this message to get flushed out, wake
-					 * them up now */
-					rds_message_unmapped(rm);
-				}
-				rds_message_put(rm);
-				send->s_op = NULL;
-			}
+		rm = rds_ib_send_unmap_op(ic, send, wc->status);
 
-			oldest = (oldest + 1) % ic->i_send_ring.w_nr;
-		}
+		if (time_after(jiffies, send->s_queued + HZ / 2))
+			rds_ib_stats_inc(s_ib_tx_stalled);
 
-		rds_ib_ring_free(&ic->i_send_ring, completed);
-		rds_ib_sub_signaled(ic, nr_sig);
-		nr_sig = 0;
-
-		if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
-		    test_bit(0, &conn->c_map_queued))
-			queue_delayed_work(rds_wq, &conn->c_send_w, 0);
-
-		/* We expect errors as the qp is drained during shutdown */
-		if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
-			rds_ib_conn_error(conn, "send completion on %pI4 had status "
-					  "%u (%s), disconnecting and reconnecting\n",
-					  &conn->c_faddr, wc.status,
-					  ib_wc_status_msg(wc.status));
+		if (send->s_op) {
+			if (send->s_op == rm->m_final_op) {
+				/* If anyone waited for this message to get
+				 * flushed out, wake them up now
+				 */
+				rds_message_unmapped(rm);
+			}
+			rds_message_put(rm);
+			send->s_op = NULL;
 		}
+
+		oldest = (oldest + 1) % ic->i_send_ring.w_nr;
+	}
+
+	rds_ib_ring_free(&ic->i_send_ring, completed);
+	rds_ib_sub_signaled(ic, nr_sig);
+	nr_sig = 0;
+
+	if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
+	    test_bit(0, &conn->c_map_queued))
+		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+
+	/* We expect errors as the qp is drained during shutdown */
+	if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
+		rds_ib_conn_error(conn, "send completion on %pI4 had status %u (%s), disconnecting and reconnecting\n",
+				  &conn->c_faddr, wc->status,
+				  ib_wc_status_msg(wc->status));
 	}
 }
 
@@ -785,23 +777,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 	send->s_queued = jiffies;
 
 	if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
-		send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
-		send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
-		send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
-		send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
-		send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
+		send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
+		send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
+		send->s_atomic_wr.swap = op->op_m_cswp.swap;
+		send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
+		send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
 	} else { /* FADD */
-		send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
-		send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
-		send->s_wr.wr.atomic.swap = 0;
-		send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
-		send->s_wr.wr.atomic.swap_mask = 0;
+		send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
+		send->s_atomic_wr.compare_add = op->op_m_fadd.add;
+		send->s_atomic_wr.swap = 0;
+		send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
+		send->s_atomic_wr.swap_mask = 0;
 	}
 	nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
-	send->s_wr.num_sge = 1;
-	send->s_wr.next = NULL;
-	send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
-	send->s_wr.wr.atomic.rkey = op->op_rkey;
+	send->s_atomic_wr.wr.num_sge = 1;
+	send->s_atomic_wr.wr.next = NULL;
+	send->s_atomic_wr.remote_addr = op->op_remote_addr;
+	send->s_atomic_wr.rkey = op->op_rkey;
 	send->s_op = op;
 	rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
 
@@ -826,11 +818,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 	if (nr_sig)
 		atomic_add(nr_sig, &ic->i_signaled_sends);
 
-	failed_wr = &send->s_wr;
-	ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
+	failed_wr = &send->s_atomic_wr.wr;
+	ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
 	rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
-		 send, &send->s_wr, ret, failed_wr);
-	BUG_ON(failed_wr != &send->s_wr);
+		 send, &send->s_atomic_wr, ret, failed_wr);
+	BUG_ON(failed_wr != &send->s_atomic_wr.wr);
 	if (ret) {
 		printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
 		       "returned %d\n", &conn->c_faddr, ret);
@@ -839,9 +831,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 		goto out;
 	}
 
-	if (unlikely(failed_wr != &send->s_wr)) {
+	if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
 		printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
-		BUG_ON(failed_wr != &send->s_wr);
+		BUG_ON(failed_wr != &send->s_atomic_wr.wr);
 	}
 
 out:
@@ -912,22 +904,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 		nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
 
 		send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
-		send->s_wr.wr.rdma.remote_addr = remote_addr;
-		send->s_wr.wr.rdma.rkey = op->op_rkey;
+		send->s_rdma_wr.remote_addr = remote_addr;
+		send->s_rdma_wr.rkey = op->op_rkey;
 
 		if (num_sge > max_sge) {
-			send->s_wr.num_sge = max_sge;
+			send->s_rdma_wr.wr.num_sge = max_sge;
 			num_sge -= max_sge;
 		} else {
-			send->s_wr.num_sge = num_sge;
+			send->s_rdma_wr.wr.num_sge = num_sge;
 		}
 
-		send->s_wr.next = NULL;
+		send->s_rdma_wr.wr.next = NULL;
 
 		if (prev)
-			prev->s_wr.next = &send->s_wr;
+			prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
 
-		for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+		for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+		     scat != &op->op_sg[op->op_count]; j++) {
 			len = ib_sg_dma_len(ic->i_cm_id->device, scat);
 			send->s_sge[j].addr =
 				 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -942,7 +935,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 		}
 
 		rdsdebug("send %p wr %p num_sge %u next %p\n", send,
-			&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+			&send->s_rdma_wr.wr,
+			send->s_rdma_wr.wr.num_sge,
+			send->s_rdma_wr.wr.next);
 
 		prev = send;
 		if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -963,11 +958,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 	if (nr_sig)
 		atomic_add(nr_sig, &ic->i_signaled_sends);
 
-	failed_wr = &first->s_wr;
-	ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+	failed_wr = &first->s_rdma_wr.wr;
+	ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
 	rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
-		 first, &first->s_wr, ret, failed_wr);
-	BUG_ON(failed_wr != &first->s_wr);
+		 first, &first->s_rdma_wr.wr, ret, failed_wr);
+	BUG_ON(failed_wr != &first->s_rdma_wr.wr);
 	if (ret) {
 		printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
 		       "returned %d\n", &conn->c_faddr, ret);
@@ -976,9 +971,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 		goto out;
 	}
 
-	if (unlikely(failed_wr != &first->s_wr)) {
+	if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
 		printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
-		BUG_ON(failed_wr != &first->s_wr);
+		BUG_ON(failed_wr != &first->s_rdma_wr.wr);
 	}
 
 
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index 2d5965d6e97c..d77e04473056 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -42,14 +42,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
 static const char *const rds_ib_stat_names[] = {
 	"ib_connect_raced",
 	"ib_listen_closed_stale",
-	"ib_tx_cq_call",
+	"s_ib_evt_handler_call",
+	"ib_tasklet_call",
 	"ib_tx_cq_event",
 	"ib_tx_ring_full",
 	"ib_tx_throttle",
 	"ib_tx_sg_mapping_failure",
 	"ib_tx_stalled",
 	"ib_tx_credit_updates",
-	"ib_rx_cq_call",
 	"ib_rx_cq_event",
 	"ib_rx_ring_empty",
 	"ib_rx_refill_from_cq",
@@ -61,12 +61,18 @@ static const char *const rds_ib_stat_names[] = {
 	"ib_ack_send_delayed",
 	"ib_ack_send_piggybacked",
 	"ib_ack_received",
-	"ib_rdma_mr_alloc",
-	"ib_rdma_mr_free",
-	"ib_rdma_mr_used",
-	"ib_rdma_mr_pool_flush",
-	"ib_rdma_mr_pool_wait",
-	"ib_rdma_mr_pool_depleted",
+	"ib_rdma_mr_8k_alloc",
+	"ib_rdma_mr_8k_free",
+	"ib_rdma_mr_8k_used",
+	"ib_rdma_mr_8k_pool_flush",
+	"ib_rdma_mr_8k_pool_wait",
+	"ib_rdma_mr_8k_pool_depleted",
+	"ib_rdma_mr_1m_alloc",
+	"ib_rdma_mr_1m_free",
+	"ib_rdma_mr_1m_used",
+	"ib_rdma_mr_1m_pool_flush",
+	"ib_rdma_mr_1m_pool_wait",
+	"ib_rdma_mr_1m_pool_depleted",
 	"ib_atomic_cswp",
 	"ib_atomic_fadd",
 };
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 3df0295c6659..576f1825fc55 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr)
 	/* Create a CMA ID and try to bind it. This catches both
 	 * IB and iWARP capable NICs.
 	 */
-	cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+	cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(cm_id))
 		return PTR_ERR(cm_id);
 
diff --git a/net/rds/iw.h b/net/rds/iw.h
index cbe6674e31ee..5af01d1758b3 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -74,10 +74,13 @@ struct rds_iw_send_work {
 	struct rm_rdma_op	*s_op;
 	struct rds_iw_mapping	*s_mapping;
 	struct ib_mr		*s_mr;
-	struct ib_fast_reg_page_list *s_page_list;
 	unsigned char		s_remap_count;
 
-	struct ib_send_wr	s_wr;
+	union {
+		struct ib_send_wr	s_send_wr;
+		struct ib_rdma_wr	s_rdma_wr;
+		struct ib_reg_wr	s_reg_wr;
+	};
 	struct ib_sge		s_sge[RDS_IW_MAX_SGE];
 	unsigned long		s_queued;
 };
@@ -195,7 +198,7 @@ struct rds_iw_device {
 
 /* Magic WR_ID for ACKs */
 #define RDS_IW_ACK_WR_ID	((u64)0xffffffffffffffffULL)
-#define RDS_IW_FAST_REG_WR_ID	((u64)0xefefefefefefefefULL)
+#define RDS_IW_REG_WR_ID	((u64)0xefefefefefefefefULL)
 #define RDS_IW_LOCAL_INV_WR_ID	((u64)0xdfdfdfdfdfdfdfdfULL)
 
 struct rds_iw_statistics {
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a6553a6fb2bc..aea4c911bc76 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
 
 	/* XXX I wonder what affect the port space has */
 	/* delegate cm event handler to rdma_transport */
-	ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+	ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
 				     RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(ic->i_cm_id)) {
 		ret = PTR_ERR(ic->i_cm_id);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 6a8fbd6e69e7..b09a40c1adce 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -47,7 +47,6 @@ struct rds_iw_mr {
 	struct rdma_cm_id	*cm_id;
 
 	struct ib_mr	*mr;
-	struct ib_fast_reg_page_list *page_list;
 
 	struct rds_iw_mapping	mapping;
 	unsigned char		remap_count;
@@ -75,10 +74,10 @@ struct rds_iw_mr_pool {
 	int			max_pages;
 };
 
-static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
+static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
 static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
 			  struct rds_iw_mr *ibmr,
 			  struct scatterlist *sg, unsigned int nents);
 static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
@@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
 	sg->bytes = 0;
 }
 
-static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
-			struct rds_iw_scatterlist *sg)
+static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
+				  struct rds_iw_scatterlist *sg)
 {
 	struct ib_device *dev = rds_iwdev->dev;
-	u64 *dma_pages = NULL;
-	int i, j, ret;
+	int i, ret;
 
 	WARN_ON(sg->dma_len);
 
 	sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
 	if (unlikely(!sg->dma_len)) {
 		printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
-		return ERR_PTR(-EBUSY);
+		return -EBUSY;
 	}
 
 	sg->bytes = 0;
@@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
 	if (sg->dma_npages > fastreg_message_size)
 		goto out_unmap;
 
-	dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
-	if (!dma_pages) {
-		ret = -ENOMEM;
-		goto out_unmap;
-	}
-
-	for (i = j = 0; i < sg->dma_len; ++i) {
-		unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
-		u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
-		u64 end_addr;
 
-		end_addr = dma_addr + dma_len;
-		dma_addr &= ~PAGE_MASK;
-		for (; dma_addr < end_addr; dma_addr += PAGE_SIZE)
-			dma_pages[j++] = dma_addr;
-		BUG_ON(j > sg->dma_npages);
-	}
 
-	return dma_pages;
+	return 0;
 
 out_unmap:
 	ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
 	sg->dma_len = 0;
-	kfree(dma_pages);
-	return ERR_PTR(ret);
+	return ret;
 }
 
 
@@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
 	INIT_LIST_HEAD(&ibmr->mapping.m_list);
 	ibmr->mapping.m_mr = ibmr;
 
-	err = rds_iw_init_fastreg(pool, ibmr);
+	err = rds_iw_init_reg(pool, ibmr);
 	if (err)
 		goto out_no_cigar;
 
@@ -479,14 +460,13 @@ void rds_iw_sync_mr(void *trans_private, int direction)
  * If the number of MRs allocated exceeds the limit, we also try
  * to free as many MRs as needed to get back to this limit.
  */
-static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
+static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
 {
 	struct rds_iw_mr *ibmr, *next;
 	LIST_HEAD(unmap_list);
 	LIST_HEAD(kill_list);
 	unsigned long flags;
 	unsigned int nfreed = 0, ncleaned = 0, unpinned = 0;
-	int ret = 0;
 
 	rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
 
@@ -538,7 +518,6 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
 	atomic_sub(nfreed, &pool->item_count);
 
 	mutex_unlock(&pool->flush_lock);
-	return ret;
 }
 
 static void rds_iw_mr_pool_flush_worker(struct work_struct *work)
@@ -622,7 +601,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
 	ibmr->cm_id = cm_id;
 	ibmr->device = rds_iwdev;
 
-	ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents);
+	ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
 	if (ret == 0)
 		*key_ret = ibmr->mr->rkey;
 	else
@@ -638,7 +617,7 @@ out:
 }
 
 /*
- * iWARP fastreg handling
+ * iWARP reg handling
  *
  * The life cycle of a fastreg registration is a bit different from
  * FMRs.
@@ -650,7 +629,7 @@ out:
  * This creates a bit of a problem for us, as we do not have the destination
  * IP in GET_MR, so the connection must be setup prior to the GET_MR call for
  * RDMA to be correctly setup.  If a fastreg request is present, rds_iw_xmit
- * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request
+ * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
  * before queuing the SEND. When completions for these arrive, they are
  * dispatched to the MR has a bit set showing that RDMa can be performed.
  *
@@ -659,11 +638,10 @@ out:
  * The expectation there is that this invalidation step includes ALL
  * PREVIOUSLY FREED MRs.
  */
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
-				struct rds_iw_mr *ibmr)
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
+			   struct rds_iw_mr *ibmr)
 {
 	struct rds_iw_device *rds_iwdev = pool->device;
-	struct ib_fast_reg_page_list *page_list = NULL;
 	struct ib_mr *mr;
 	int err;
 
@@ -676,55 +654,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
 		return err;
 	}
 
-	/* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
-	 * is not filled in.
-	 */
-	page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
-	if (IS_ERR(page_list)) {
-		err = PTR_ERR(page_list);
-
-		printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
-		ib_dereg_mr(mr);
-		return err;
-	}
-
-	ibmr->page_list = page_list;
 	ibmr->mr = mr;
 	return 0;
 }
 
-static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping)
+static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
 {
 	struct rds_iw_mr *ibmr = mapping->m_mr;
-	struct ib_send_wr f_wr, *failed_wr;
-	int ret;
+	struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
+	struct ib_reg_wr reg_wr;
+	struct ib_send_wr *failed_wr;
+	int ret, n;
+
+	n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
+	if (unlikely(n != m_sg->len))
+		return n < 0 ? n : -EINVAL;
+
+	reg_wr.wr.next = NULL;
+	reg_wr.wr.opcode = IB_WR_REG_MR;
+	reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
+	reg_wr.wr.num_sge = 0;
+	reg_wr.mr = ibmr->mr;
+	reg_wr.key = mapping->m_rkey;
+	reg_wr.access = IB_ACCESS_LOCAL_WRITE |
+			IB_ACCESS_REMOTE_READ |
+			IB_ACCESS_REMOTE_WRITE;
 
 	/*
-	 * Perform a WR for the fast_reg_mr. Each individual page
+	 * Perform a WR for the reg_mr. Each individual page
 	 * in the sg list is added to the fast reg page list and placed
-	 * inside the fast_reg_mr WR.  The key used is a rolling 8bit
+	 * inside the reg_mr WR.  The key used is a rolling 8bit
 	 * counter, which should guarantee uniqueness.
 	 */
 	ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
 	mapping->m_rkey = ibmr->mr->rkey;
 
-	memset(&f_wr, 0, sizeof(f_wr));
-	f_wr.wr_id = RDS_IW_FAST_REG_WR_ID;
-	f_wr.opcode = IB_WR_FAST_REG_MR;
-	f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
-	f_wr.wr.fast_reg.rkey = mapping->m_rkey;
-	f_wr.wr.fast_reg.page_list = ibmr->page_list;
-	f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
-	f_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-	f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
-				IB_ACCESS_REMOTE_READ |
-				IB_ACCESS_REMOTE_WRITE;
-	f_wr.wr.fast_reg.iova_start = 0;
-	f_wr.send_flags = IB_SEND_SIGNALED;
-
-	failed_wr = &f_wr;
-	ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
-	BUG_ON(failed_wr != &f_wr);
+	failed_wr = &reg_wr.wr;
+	ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
+	BUG_ON(failed_wr != &reg_wr.wr);
 	if (ret)
 		printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
 			__func__, __LINE__, ret);
@@ -756,21 +723,20 @@ out:
 	return ret;
 }
 
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
-			struct rds_iw_mr *ibmr,
-			struct scatterlist *sg,
-			unsigned int sg_len)
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
+			  struct rds_iw_mr *ibmr,
+			  struct scatterlist *sg,
+			  unsigned int sg_len)
 {
 	struct rds_iw_device *rds_iwdev = pool->device;
 	struct rds_iw_mapping *mapping = &ibmr->mapping;
 	u64 *dma_pages;
-	int i, ret = 0;
+	int ret = 0;
 
 	rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
 
-	dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
-	if (IS_ERR(dma_pages)) {
-		ret = PTR_ERR(dma_pages);
+	ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
+	if (ret) {
 		dma_pages = NULL;
 		goto out;
 	}
@@ -780,10 +746,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
 		goto out;
 	}
 
-	for (i = 0; i < mapping->m_sg.dma_npages; ++i)
-		ibmr->page_list->page_list[i] = dma_pages[i];
-
-	ret = rds_iw_rdma_build_fastreg(mapping);
+	ret = rds_iw_rdma_reg_mr(mapping);
 	if (ret)
 		goto out;
 
@@ -869,8 +832,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
 		struct rds_iw_mr *ibmr)
 {
-	if (ibmr->page_list)
-		ib_free_fast_reg_page_list(ibmr->page_list);
 	if (ibmr->mr)
 		ib_dereg_mr(ibmr->mr);
 }
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 86152ec3b887..e20bd503f4bd 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
 		send->s_op = NULL;
 		send->s_mapping = NULL;
 
-		send->s_wr.next = NULL;
-		send->s_wr.wr_id = i;
-		send->s_wr.sg_list = send->s_sge;
-		send->s_wr.num_sge = 1;
-		send->s_wr.opcode = IB_WR_SEND;
-		send->s_wr.send_flags = 0;
-		send->s_wr.ex.imm_data = 0;
+		send->s_send_wr.next = NULL;
+		send->s_send_wr.wr_id = i;
+		send->s_send_wr.sg_list = send->s_sge;
+		send->s_send_wr.num_sge = 1;
+		send->s_send_wr.opcode = IB_WR_SEND;
+		send->s_send_wr.send_flags = 0;
+		send->s_send_wr.ex.imm_data = 0;
 
 		sge = rds_iw_data_sge(ic, send->s_sge);
 		sge->lkey = 0;
@@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
 			printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
 			break;
 		}
-
-		send->s_page_list = ib_alloc_fast_reg_page_list(
-			ic->i_cm_id->device, fastreg_message_size);
-		if (IS_ERR(send->s_page_list)) {
-			printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
-			break;
-		}
 	}
 }
 
@@ -177,9 +170,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
 	for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
 		BUG_ON(!send->s_mr);
 		ib_dereg_mr(send->s_mr);
-		BUG_ON(!send->s_page_list);
-		ib_free_fast_reg_page_list(send->s_page_list);
-		if (send->s_wr.opcode == 0xdead)
+		if (send->s_send_wr.opcode == 0xdead)
 			continue;
 		if (send->s_rm)
 			rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
@@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
 			continue;
 		}
 
-		if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
+		if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
 			ic->i_fastreg_posted = 1;
 			continue;
 		}
@@ -247,12 +238,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
 			send = &ic->i_sends[oldest];
 
 			/* In the error case, wc.opcode sometimes contains garbage */
-			switch (send->s_wr.opcode) {
+			switch (send->s_send_wr.opcode) {
 			case IB_WR_SEND:
 				if (send->s_rm)
 					rds_iw_send_unmap_rm(ic, send, wc.status);
 				break;
-			case IB_WR_FAST_REG_MR:
+			case IB_WR_REG_MR:
 			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_READ:
 			case IB_WR_RDMA_READ_WITH_INV:
@@ -262,12 +253,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
 			default:
 				printk_ratelimited(KERN_NOTICE
 						"RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
-						__func__, send->s_wr.opcode);
+						__func__, send->s_send_wr.opcode);
 				break;
 			}
 
-			send->s_wr.opcode = 0xdead;
-			send->s_wr.num_sge = 1;
+			send->s_send_wr.opcode = 0xdead;
+			send->s_send_wr.num_sge = 1;
 			if (time_after(jiffies, send->s_queued + HZ/2))
 				rds_iw_stats_inc(s_iw_tx_stalled);
 
@@ -455,10 +446,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
 
 	WARN_ON(pos != send - ic->i_sends);
 
-	send->s_wr.send_flags = send_flags;
-	send->s_wr.opcode = IB_WR_SEND;
-	send->s_wr.num_sge = 2;
-	send->s_wr.next = NULL;
+	send->s_send_wr.send_flags = send_flags;
+	send->s_send_wr.opcode = IB_WR_SEND;
+	send->s_send_wr.num_sge = 2;
+	send->s_send_wr.next = NULL;
 	send->s_queued = jiffies;
 	send->s_op = NULL;
 
@@ -472,7 +463,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
 	} else {
 		/* We're sending a packet with no payload. There is only
 		 * one SGE */
-		send->s_wr.num_sge = 1;
+		send->s_send_wr.num_sge = 1;
 		sge = &send->s_sge[0];
 	}
 
@@ -672,23 +663,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
 		 */
 		if (ic->i_unsignaled_wrs-- == 0) {
 			ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
-			send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+			send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
 		}
 
 		ic->i_unsignaled_bytes -= len;
 		if (ic->i_unsignaled_bytes <= 0) {
 			ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
-			send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+			send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
 		}
 
 		/*
 		 * Always signal the last one if we're stopping due to flow control.
 		 */
 		if (flow_controlled && i == (work_alloc-1))
-			send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+			send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
 
 		rdsdebug("send %p wr %p num_sge %u next %p\n", send,
-			 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+			 &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
 
 		sent += len;
 		rm->data.op_dmaoff += len;
@@ -722,7 +713,7 @@ add_header:
 		}
 
 		if (prev)
-			prev->s_wr.next = &send->s_wr;
+			prev->s_send_wr.next = &send->s_send_wr;
 		prev = send;
 
 		pos = (pos + 1) % ic->i_send_ring.w_nr;
@@ -736,7 +727,7 @@ add_header:
 	/* if we finished the message then send completion owns it */
 	if (scat == &rm->data.op_sg[rm->data.op_count]) {
 		prev->s_rm = ic->i_rm;
-		prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+		prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
 		ic->i_rm = NULL;
 	}
 
@@ -748,11 +739,11 @@ add_header:
 		rds_iw_send_add_credits(conn, credit_alloc - i);
 
 	/* XXX need to worry about failed_wr and partial sends. */
-	failed_wr = &first->s_wr;
-	ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+	failed_wr = &first->s_send_wr;
+	ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
 	rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
-		 first, &first->s_wr, ret, failed_wr);
-	BUG_ON(failed_wr != &first->s_wr);
+		 first, &first->s_send_wr, ret, failed_wr);
+	BUG_ON(failed_wr != &first->s_send_wr);
 	if (ret) {
 		printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
 		       "returned %d\n", &conn->c_faddr, ret);
@@ -770,24 +761,26 @@ out:
 	return ret;
 }
 
-static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
+static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
+				 struct scatterlist *sg,
+				 int sg_nents)
 {
-	BUG_ON(nent > send->s_page_list->max_page_list_len);
-	/*
-	 * Perform a WR for the fast_reg_mr. Each individual page
-	 * in the sg list is added to the fast reg page list and placed
-	 * inside the fast_reg_mr WR.
-	 */
-	send->s_wr.opcode = IB_WR_FAST_REG_MR;
-	send->s_wr.wr.fast_reg.length = len;
-	send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
-	send->s_wr.wr.fast_reg.page_list = send->s_page_list;
-	send->s_wr.wr.fast_reg.page_list_len = nent;
-	send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-	send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
-	send->s_wr.wr.fast_reg.iova_start = sg_addr;
+	int n;
+
+	n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
+	if (unlikely(n != sg_nents))
+		return n < 0 ? n : -EINVAL;
+
+	send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
+	send->s_reg_wr.wr.wr_id = 0;
+	send->s_reg_wr.wr.num_sge = 0;
+	send->s_reg_wr.mr = send->s_mr;
+	send->s_reg_wr.key = send->s_mr->rkey;
+	send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
 
 	ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
+
+	return 0;
 }
 
 int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
@@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 	int sent;
 	int ret;
 	int num_sge;
+	int sg_nents;
 
 	rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
 
@@ -861,9 +855,10 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 	scat = &op->op_sg[0];
 	sent = 0;
 	num_sge = op->op_count;
+	sg_nents = 0;
 
 	for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
-		send->s_wr.send_flags = 0;
+		send->s_rdma_wr.wr.send_flags = 0;
 		send->s_queued = jiffies;
 
 		/*
@@ -872,7 +867,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 		 */
 		if (ic->i_unsignaled_wrs-- == 0) {
 			ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
-			send->s_wr.send_flags = IB_SEND_SIGNALED;
+			send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
 		}
 
 		/* To avoid the need to have the plumbing to invalidate the fastreg_mr used
@@ -880,30 +875,31 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 		 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
 		 */
 		if (op->op_write)
-			send->s_wr.opcode = IB_WR_RDMA_WRITE;
+			send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
 		else
-			send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+			send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
 
-		send->s_wr.wr.rdma.remote_addr = remote_addr;
-		send->s_wr.wr.rdma.rkey = op->op_rkey;
+		send->s_rdma_wr.remote_addr = remote_addr;
+		send->s_rdma_wr.rkey = op->op_rkey;
 		send->s_op = op;
 
 		if (num_sge > rds_iwdev->max_sge) {
-			send->s_wr.num_sge = rds_iwdev->max_sge;
+			send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
 			num_sge -= rds_iwdev->max_sge;
 		} else
-			send->s_wr.num_sge = num_sge;
+			send->s_rdma_wr.wr.num_sge = num_sge;
 
-		send->s_wr.next = NULL;
+		send->s_rdma_wr.wr.next = NULL;
 
 		if (prev)
-			prev->s_wr.next = &send->s_wr;
+			prev->s_send_wr.next = &send->s_rdma_wr.wr;
 
-		for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+		for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+		     scat != &op->op_sg[op->op_count]; j++) {
 			len = ib_sg_dma_len(ic->i_cm_id->device, scat);
 
-			if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
-				send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
+			if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
+				sg_nents++;
 			else {
 				send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
 				send->s_sge[j].length = len;
@@ -917,15 +913,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 			scat++;
 		}
 
-		if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
-			send->s_wr.num_sge = 1;
+		if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
+			send->s_rdma_wr.wr.num_sge = 1;
 			send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
 			send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
 			send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
 		}
 
 		rdsdebug("send %p wr %p num_sge %u next %p\n", send,
-			&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+			&send->s_rdma_wr,
+			send->s_rdma_wr.wr.num_sge,
+			send->s_rdma_wr.wr.next);
 
 		prev = send;
 		if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -934,7 +932,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 
 	/* if we finished the message then send completion owns it */
 	if (scat == &op->op_sg[op->op_count])
-		first->s_wr.send_flags = IB_SEND_SIGNALED;
+		first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
 
 	if (i < work_alloc) {
 		rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
@@ -948,16 +946,20 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 	 * fastreg_mr (or possibly a dma_mr)
 	 */
 	if (!op->op_write) {
-		rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
-			op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
+		ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
+					    &op->op_sg[0], sg_nents);
+		if (ret) {
+			printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
+			goto out;
+		}
 		work_alloc++;
 	}
 
-	failed_wr = &first->s_wr;
-	ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+	failed_wr = &first->s_rdma_wr.wr;
+	ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
 	rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
-		 first, &first->s_wr, ret, failed_wr);
-	BUG_ON(failed_wr != &first->s_wr);
+		 first, &first->s_rdma_wr, ret, failed_wr);
+	BUG_ON(failed_wr != &first->s_rdma_wr.wr);
 	if (ret) {
 		printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
 		       "returned %d\n", &conn->c_faddr, ret);
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index b9b40af5345b..9c1fed81bf0f 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void)
 	struct rdma_cm_id *cm_id;
 	int ret;
 
-	cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
-			       IB_QPT_RC);
+	cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
+			       RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(cm_id)) {
 		ret = PTR_ERR(cm_id);
 		printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
diff --git a/net/rds/rds.h b/net/rds/rds.h
index afb4048d0cfd..0e2797bdc316 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -7,6 +7,7 @@
 #include <rdma/rdma_cm.h>
 #include <linux/mutex.h>
 #include <linux/rds.h>
+#include <linux/rhashtable.h>
 
 #include "info.h"
 
@@ -86,7 +87,9 @@ struct rds_connection {
 	struct hlist_node	c_hash_node;
 	__be32			c_laddr;
 	__be32			c_faddr;
-	unsigned int		c_loopback:1;
+	unsigned int		c_loopback:1,
+				c_outgoing:1,
+				c_pad_to_32:30;
 	struct rds_connection	*c_passive;
 
 	struct rds_cong_map	*c_lcong;
@@ -472,7 +475,8 @@ struct rds_sock {
 	 * bound_addr used for both incoming and outgoing, no INADDR_ANY
 	 * support.
 	 */
-	struct hlist_node	rs_bound_node;
+	struct rhash_head	rs_bound_node;
+	u64			rs_bound_key;
 	__be32			rs_bound_addr;
 	__be32			rs_conn_addr;
 	__be16			rs_bound_port;
@@ -603,6 +607,8 @@ extern wait_queue_head_t rds_poll_waitq;
 int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 void rds_remove_bound(struct rds_sock *rs);
 struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
+int rds_bind_lock_init(void);
+void rds_bind_lock_destroy(void);
 
 /* cong.c */
 int rds_cong_get_maps(struct rds_connection *conn);
diff --git a/net/rds/send.c b/net/rds/send.c
index 4df61a515b83..827155c2ead1 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -38,6 +38,7 @@
 #include <linux/list.h>
 #include <linux/ratelimit.h>
 #include <linux/export.h>
+#include <linux/sizes.h>
 
 #include "rds.h"
 
@@ -51,7 +52,7 @@
  * it to 0 will restore the old behavior (where we looped until we had
  * drained the queue).
  */
-static int send_batch_count = 64;
+static int send_batch_count = SZ_1K;
 module_param(send_batch_count, int, 0444);
 MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
 
@@ -223,7 +224,7 @@ restart:
 			 * through a lot of messages, lets back off and see
 			 * if anyone else jumps in
 			 */
-			if (batch_count >= 1024)
+			if (batch_count >= send_batch_count)
 				goto over_batch;
 
 			spin_lock_irqsave(&conn->c_lock, flags);
@@ -423,12 +424,15 @@ over_batch:
 		     !list_empty(&conn->c_send_queue)) &&
 		    send_gen == conn->c_send_gen) {
 			rds_stats_inc(s_send_lock_queue_raced);
-			goto restart;
+			if (batch_count < send_batch_count)
+				goto restart;
+			queue_delayed_work(rds_wq, &conn->c_send_w, 1);
 		}
 	}
 out:
 	return ret;
 }
+EXPORT_SYMBOL_GPL(rds_send_xmit);
 
 static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm)
 {
@@ -1120,8 +1124,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 	 */
 	rds_stats_inc(s_send_queued);
 
-	if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
-		rds_send_xmit(conn);
+	ret = rds_send_xmit(conn);
+	if (ret == -ENOMEM || ret == -EAGAIN)
+		queue_delayed_work(rds_wq, &conn->c_send_w, 1);
 
 	rds_message_put(rm);
 	return payload_len;
@@ -1177,8 +1182,8 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
 	rds_stats_inc(s_send_queued);
 	rds_stats_inc(s_send_pong);
 
-	if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
-		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+	/* schedule the send work on rds_wq */
+	queue_delayed_work(rds_wq, &conn->c_send_w, 1);
 
 	rds_message_put(rm);
 	return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index c42b60bf4c68..9d6ddbacd875 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -67,21 +67,13 @@ void rds_tcp_nonagle(struct socket *sock)
 	set_fs(oldfs);
 }
 
+/* All module specific customizations to the RDS-TCP socket should be done in
+ * rds_tcp_tune() and applied after socket creation. In general these
+ * customizations should be tunable via module_param()
+ */
 void rds_tcp_tune(struct socket *sock)
 {
-	struct sock *sk = sock->sk;
-
 	rds_tcp_nonagle(sock);
-
-	/*
-	 * We're trying to saturate gigabit with the default,
-	 * see svc_sock_setbufsize().
-	 */
-	lock_sock(sk);
-	sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE;
-	sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE;
-	sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
-	release_sock(sk);
 }
 
 u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 444d78d0bd77..0936a4a32b47 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -110,28 +110,27 @@ int rds_tcp_accept_one(struct socket *sock)
 		goto out;
 	}
 	/* An incoming SYN request came in, and TCP just accepted it.
-	 * We always create a new conn for listen side of TCP, and do not
-	 * add it to the c_hash_list.
 	 *
 	 * If the client reboots, this conn will need to be cleaned up.
 	 * rds_tcp_state_change() will do that cleanup
 	 */
 	rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
-	WARN_ON(!rs_tcp || rs_tcp->t_sock);
-
-	/*
-	 * see the comment above rds_queue_delayed_reconnect()
-	 */
-	if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
-		if (rds_conn_state(conn) == RDS_CONN_UP)
-			rds_tcp_stats_inc(s_tcp_listen_closed_stale);
-		else
-			rds_tcp_stats_inc(s_tcp_connect_raced);
-		rds_conn_drop(conn);
+	if (rs_tcp->t_sock &&
+	    ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
+		struct sock *nsk = new_sock->sk;
+
+		nsk->sk_user_data = NULL;
+		nsk->sk_prot->disconnect(nsk, 0);
+		tcp_done(nsk);
+		new_sock = NULL;
 		ret = 0;
 		goto out;
+	} else if (rs_tcp->t_sock) {
+		rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
+		conn->c_outgoing = 0;
 	}
 
+	rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
 	rds_tcp_set_callbacks(new_sock, conn);
 	rds_connect_complete(conn);
 	new_sock = NULL;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 53b17ca0dff5..2894e6095e3b 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -83,6 +83,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
 	struct rds_tcp_connection *tc = conn->c_transport_data;
 	int done = 0;
 	int ret = 0;
+	int more;
 
 	if (hdr_off == 0) {
 		/*
@@ -116,12 +117,15 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
 			goto out;
 	}
 
+	more = rm->data.op_nents > 1 ? (MSG_MORE | MSG_SENDPAGE_NOTLAST) : 0;
 	while (sg < rm->data.op_nents) {
+		int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
+
 		ret = tc->t_sock->ops->sendpage(tc->t_sock,
 						sg_page(&rm->data.op_sg[sg]),
 						rm->data.op_sg[sg].offset + off,
 						rm->data.op_sg[sg].length - off,
-						MSG_DONTWAIT|MSG_NOSIGNAL);
+						flags);
 		rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
 			 rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
 			 ret);
@@ -134,6 +138,8 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
 			off = 0;
 			sg++;
 		}
+		if (sg == rm->data.op_nents - 1)
+			more = 0;
 	}
 
 out:
diff --git a/net/rds/threads.c b/net/rds/threads.c
index dc2402e871fd..454aa6d23327 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -162,7 +162,9 @@ void rds_send_worker(struct work_struct *work)
 	int ret;
 
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
+		clear_bit(RDS_LL_SEND_FULL, &conn->c_flags);
 		ret = rds_send_xmit(conn);
+		cond_resched();
 		rdsdebug("conn %p ret %d\n", conn, ret);
 		switch (ret) {
 		case -EAGAIN:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 25d60ed15284..1f8a144a5dc2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -305,7 +305,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 
 	if (!key)
 		key = rx->key;
-	if (key && !key->payload.data)
+	if (key && !key->payload.data[0])
 		key = NULL; /* a no-security key */
 
 	bundle = rxrpc_get_bundle(rx, trans, key, service_id, gfp);
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 6631f4f1e39b..6c71ed1caf16 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -500,7 +500,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 		if (bundle->num_conns >= 20) {
 			_debug("too many conns");
 
-			if (!(gfp & __GFP_WAIT)) {
+			if (!gfpflags_allow_blocking(gfp)) {
 				_leave(" = -EAGAIN");
 				return -EAGAIN;
 			}
@@ -808,7 +808,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
 
 	ASSERTCMP(atomic_read(&conn->usage), >, 0);
 
-	conn->put_time = get_seconds();
+	conn->put_time = ktime_get_seconds();
 	if (atomic_dec_and_test(&conn->usage)) {
 		_debug("zombie");
 		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
@@ -852,7 +852,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_seconds();
 	earliest = ULONG_MAX;
 
 	write_lock_bh(&rxrpc_connection_lock);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index aef1bd294e17..2934a73a5981 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -208,7 +208,7 @@ struct rxrpc_transport {
 	struct rb_root		server_conns;	/* server connections on this transport */
 	struct list_head	link;		/* link in master session list */
 	struct sk_buff_head	error_queue;	/* error packets awaiting processing */
-	time_t			put_time;	/* time at which to reap */
+	unsigned long		put_time;	/* time at which to reap */
 	spinlock_t		client_lock;	/* client connection allocation lock */
 	rwlock_t		conn_lock;	/* lock for active/dead connections */
 	atomic_t		usage;
@@ -256,7 +256,7 @@ struct rxrpc_connection {
 	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
 	unsigned long		events;
 #define RXRPC_CONN_CHALLENGE	0		/* send challenge packet */
-	time_t			put_time;	/* time at which to reap */
+	unsigned long		put_time;	/* time at which to reap */
 	rwlock_t		lock;		/* access lock */
 	spinlock_t		state_lock;	/* state-change lock */
 	atomic_t		usage;
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index db0f39f5ef96..da3cc09f683e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -148,10 +148,10 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
 		       token->kad->ticket[6], token->kad->ticket[7]);
 
 	/* count the number of tokens attached */
-	prep->type_data[0] = (void *)((unsigned long)prep->type_data[0] + 1);
+	prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
 
 	/* attach the data */
-	for (pptoken = (struct rxrpc_key_token **)&prep->payload[0];
+	for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0];
 	     *pptoken;
 	     pptoken = &(*pptoken)->next)
 		continue;
@@ -522,7 +522,7 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep,
 		goto inval;
 
 	/* attach the payload */
-	for (pptoken = (struct rxrpc_key_token **)&prep->payload[0];
+	for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0];
 	     *pptoken;
 	     pptoken = &(*pptoken)->next)
 		continue;
@@ -764,10 +764,10 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep)
 	memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length);
 
 	/* count the number of tokens attached */
-	prep->type_data[0] = (void *)((unsigned long)prep->type_data[0] + 1);
+	prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
 
 	/* attach the data */
-	pp = (struct rxrpc_key_token **)&prep->payload[0];
+	pp = (struct rxrpc_key_token **)&prep->payload.data[0];
 	while (*pp)
 		pp = &(*pp)->next;
 	*pp = token;
@@ -814,7 +814,7 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token)
  */
 static void rxrpc_free_preparse(struct key_preparsed_payload *prep)
 {
-	rxrpc_free_token_list(prep->payload[0]);
+	rxrpc_free_token_list(prep->payload.data[0]);
 }
 
 /*
@@ -831,7 +831,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
 	if (prep->datalen != 8)
 		return -EINVAL;
 
-	memcpy(&prep->type_data, prep->data, 8);
+	memcpy(&prep->payload.data[2], prep->data, 8);
 
 	ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(ci)) {
@@ -842,7 +842,7 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
 	if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0)
 		BUG();
 
-	prep->payload[0] = ci;
+	prep->payload.data[0] = ci;
 	_leave(" = 0");
 	return 0;
 }
@@ -852,8 +852,8 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
  */
 static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
 {
-	if (prep->payload[0])
-		crypto_free_blkcipher(prep->payload[0]);
+	if (prep->payload.data[0])
+		crypto_free_blkcipher(prep->payload.data[0]);
 }
 
 /*
@@ -861,7 +861,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
  */
 static void rxrpc_destroy(struct key *key)
 {
-	rxrpc_free_token_list(key->payload.data);
+	rxrpc_free_token_list(key->payload.data[0]);
 }
 
 /*
@@ -869,9 +869,9 @@ static void rxrpc_destroy(struct key *key)
  */
 static void rxrpc_destroy_s(struct key *key)
 {
-	if (key->payload.data) {
-		crypto_free_blkcipher(key->payload.data);
-		key->payload.data = NULL;
+	if (key->payload.data[0]) {
+		crypto_free_blkcipher(key->payload.data[0]);
+		key->payload.data[0] = NULL;
 	}
 }
 
@@ -1070,7 +1070,7 @@ static long rxrpc_read(const struct key *key,
 	size += 1 * 4;	/* token count */
 
 	ntoks = 0;
-	for (token = key->payload.data; token; token = token->next) {
+	for (token = key->payload.data[0]; token; token = token->next) {
 		toksize = 4;	/* sec index */
 
 		switch (token->security_index) {
@@ -1163,7 +1163,7 @@ static long rxrpc_read(const struct key *key,
 	ENCODE(ntoks);
 
 	tok = 0;
-	for (token = key->payload.data; token; token = token->next) {
+	for (token = key->payload.data[0]; token; token = token->next) {
 		toksize = toksizes[tok++];
 		ENCODE(toksize);
 		oldxdr = xdr;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index c0042807bfc6..a40d3afe93b7 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -158,7 +158,7 @@ int rxrpc_client_sendmsg(struct rxrpc_sock *rx, struct rxrpc_transport *trans,
 			service_id = htons(srx->srx_service);
 		}
 		key = rx->key;
-		if (key && !rx->key->payload.data)
+		if (key && !rx->key->payload.data[0])
 			key = NULL;
 		bundle = rxrpc_get_bundle(rx, trans, key, service_id,
 					  GFP_KERNEL);
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
index 49b3cc31ee1f..8334474eb26c 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/ar-security.c
@@ -137,9 +137,9 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 	if (ret < 0)
 		return ret;
 
-	if (!key->payload.data)
+	token = key->payload.data[0];
+	if (!token)
 		return -EKEYREJECTED;
-	token = key->payload.data;
 
 	sec = rxrpc_security_lookup(token->security_index);
 	if (!sec)
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 1976dec84f29..9946467f16b4 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -189,7 +189,7 @@ void rxrpc_put_transport(struct rxrpc_transport *trans)
 
 	ASSERTCMP(atomic_read(&trans->usage), >, 0);
 
-	trans->put_time = get_seconds();
+	trans->put_time = ktime_get_seconds();
 	if (unlikely(atomic_dec_and_test(&trans->usage))) {
 		_debug("zombie");
 		/* let the reaper determine the timeout to avoid a race with
@@ -226,7 +226,7 @@ static void rxrpc_transport_reaper(struct work_struct *work)
 
 	_enter("");
 
-	now = get_seconds();
+	now = ktime_get_seconds();
 	earliest = ULONG_MAX;
 
 	/* extract all the transports that have been dead too long */
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index f226709ebd8f..d7a9ab5a9d9c 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -67,7 +67,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 
 	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
 
-	token = conn->key->payload.data;
+	token = conn->key->payload.data[0];
 	conn->security_ix = token->security_index;
 
 	ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
@@ -125,7 +125,7 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
 	if (!conn->key)
 		return;
 
-	token = conn->key->payload.data;
+	token = conn->key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	desc.tfm = conn->cipher;
@@ -221,7 +221,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 	rxkhdr.checksum = 0;
 
 	/* encrypt from the session key */
-	token = call->conn->key->payload.data;
+	token = call->conn->key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 	desc.tfm = call->conn->cipher;
 	desc.info = iv.x;
@@ -433,7 +433,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	/* decrypt from the session key */
-	token = call->conn->key->payload.data;
+	token = call->conn->key->payload.data[0];
 	memcpy(&iv, token->kad->session_key, sizeof(iv));
 	desc.tfm = call->conn->cipher;
 	desc.info = iv.x;
@@ -780,7 +780,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 	if (conn->security_level < min_level)
 		goto protocol_error;
 
-	token = conn->key->payload.data;
+	token = conn->key->payload.data[0];
 
 	/* build the response packet */
 	memset(&resp, 0, sizeof(resp));
@@ -848,12 +848,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 		}
 	}
 
-	ASSERT(conn->server_key->payload.data != NULL);
+	ASSERT(conn->server_key->payload.data[0] != NULL);
 	ASSERTCMP((unsigned long) ticket & 7UL, ==, 0);
 
-	memcpy(&iv, &conn->server_key->type_data, sizeof(iv));
+	memcpy(&iv, &conn->server_key->payload.data[2], sizeof(iv));
 
-	desc.tfm = conn->server_key->payload.data;
+	desc.tfm = conn->server_key->payload.data[0];
 	desc.info = iv.x;
 	desc.flags = 0;
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 559bfa011bda..0bc6f912f870 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 	case TC_ACT_PIPE:
 	case TC_ACT_RECLASSIFY:
 	case TC_ACT_OK:
+	case TC_ACT_REDIRECT:
 		action = filter_res;
 		break;
 	case TC_ACT_SHOT:
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 5019a47b9270..bb41699c6c49 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -68,13 +68,13 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
-			       proto, &tuple))
+			       proto, ca->net, &tuple))
 		goto out;
 
 	zone.id = ca->zone;
 	zone.dir = NF_CT_DEFAULT_ZONE_DIR;
 
-	thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple);
+	thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
 	if (!thash)
 		goto out;
 
@@ -119,6 +119,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 
 		ci = to_connmark(a);
 		ci->tcf_action = parm->action;
+		ci->net = net;
 		ci->zone = parm->zone;
 
 		tcf_hash_insert(a);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 99c9cc1c7af9..d05869646515 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -189,6 +189,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
 	 * worry later - danger - this API seems to have changed
 	 * from earlier kernels
 	 */
+	par.net	     = dev_net(skb->dev);
 	par.in       = skb->dev;
 	par.out      = NULL;
 	par.hooknum  = ipt->tcfi_hook;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index e5168f8b9640..5faaa5425f7b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -38,6 +38,7 @@ struct cls_bpf_prog {
 	struct bpf_prog *filter;
 	struct list_head link;
 	struct tcf_result res;
+	bool exts_integrated;
 	struct tcf_exts exts;
 	u32 handle;
 	union {
@@ -52,6 +53,7 @@ struct cls_bpf_prog {
 
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
+	[TCA_BPF_FLAGS]		= { .type = NLA_U32 },
 	[TCA_BPF_FD]		= { .type = NLA_U32 },
 	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
 	[TCA_BPF_OPS_LEN]	= { .type = NLA_U16 },
@@ -59,6 +61,20 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
 };
 
+static int cls_bpf_exec_opcode(int code)
+{
+	switch (code) {
+	case TC_ACT_OK:
+	case TC_ACT_SHOT:
+	case TC_ACT_STOLEN:
+	case TC_ACT_REDIRECT:
+	case TC_ACT_UNSPEC:
+		return code;
+	default:
+		return TC_ACT_UNSPEC;
+	}
+}
+
 static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			    struct tcf_result *res)
 {
@@ -79,6 +95,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	list_for_each_entry_rcu(prog, &head->plist, link) {
 		int filter_res;
 
+		qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
+
 		if (at_ingress) {
 			/* It is safe to push/pull even if skb_shared() */
 			__skb_push(skb, skb->mac_len);
@@ -88,6 +106,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			filter_res = BPF_PROG_RUN(prog->filter, skb);
 		}
 
+		if (prog->exts_integrated) {
+			res->class = prog->res.class;
+			res->classid = qdisc_skb_cb(skb)->tc_classid;
+
+			ret = cls_bpf_exec_opcode(filter_res);
+			if (ret == TC_ACT_UNSPEC)
+				continue;
+			break;
+		}
+
 		if (filter_res == 0)
 			continue;
 
@@ -195,8 +223,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
 	return ret;
 }
 
-static int cls_bpf_prog_from_ops(struct nlattr **tb,
-				 struct cls_bpf_prog *prog, u32 classid)
+static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
 {
 	struct sock_filter *bpf_ops;
 	struct sock_fprog_kern fprog_tmp;
@@ -230,15 +257,13 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb,
 	prog->bpf_ops = bpf_ops;
 	prog->bpf_num_ops = bpf_num_ops;
 	prog->bpf_name = NULL;
-
 	prog->filter = fp;
-	prog->res.classid = classid;
 
 	return 0;
 }
 
-static int cls_bpf_prog_from_efd(struct nlattr **tb,
-				 struct cls_bpf_prog *prog, u32 classid)
+static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+				 const struct tcf_proto *tp)
 {
 	struct bpf_prog *fp;
 	char *name = NULL;
@@ -268,9 +293,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb,
 	prog->bpf_ops = NULL;
 	prog->bpf_fd = bpf_fd;
 	prog->bpf_name = name;
-
 	prog->filter = fp;
-	prog->res.classid = classid;
+
+	if (fp->dst_needed)
+		netif_keep_dst(qdisc_dev(tp->q));
 
 	return 0;
 }
@@ -280,16 +306,13 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 				   unsigned long base, struct nlattr **tb,
 				   struct nlattr *est, bool ovr)
 {
+	bool is_bpf, is_ebpf, have_exts = false;
 	struct tcf_exts exts;
-	bool is_bpf, is_ebpf;
-	u32 classid;
 	int ret;
 
 	is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
 	is_ebpf = tb[TCA_BPF_FD];
-
-	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
-	    !tb[TCA_BPF_CLASSID])
+	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 		return -EINVAL;
 
 	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
@@ -297,18 +320,32 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	if (ret < 0)
 		return ret;
 
-	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+	if (tb[TCA_BPF_FLAGS]) {
+		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+
+		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
+			tcf_exts_destroy(&exts);
+			return -EINVAL;
+		}
+
+		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+	}
+
+	prog->exts_integrated = have_exts;
 
-	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
-		       cls_bpf_prog_from_efd(tb, prog, classid);
+	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+		       cls_bpf_prog_from_efd(tb, prog, tp);
 	if (ret < 0) {
 		tcf_exts_destroy(&exts);
 		return ret;
 	}
 
-	tcf_bind_filter(tp, &prog->res, base);
-	tcf_exts_change(tp, &prog->exts, &exts);
+	if (tb[TCA_BPF_CLASSID]) {
+		prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+		tcf_bind_filter(tp, &prog->res, base);
+	}
 
+	tcf_exts_change(tp, &prog->exts, &exts);
 	return 0;
 }
 
@@ -429,6 +466,7 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 {
 	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
 	struct nlattr *nest;
+	u32 bpf_flags = 0;
 	int ret;
 
 	if (prog == NULL)
@@ -440,7 +478,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 	if (nest == NULL)
 		goto nla_put_failure;
 
-	if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
+	if (prog->res.classid &&
+	    nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
 		goto nla_put_failure;
 
 	if (cls_bpf_is_ebpf(prog))
@@ -453,6 +492,11 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 	if (tcf_exts_dump(skb, &prog->exts) < 0)
 		goto nla_put_failure;
 
+	if (prog->exts_integrated)
+		bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
+	if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
+		goto nla_put_failure;
+
 	nla_nest_end(skb, nest);
 
 	if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 536838b657bf..fbfec6a18839 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -22,6 +22,7 @@
 #include <linux/if_vlan.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <net/inet_sock.h>
 
 #include <net/pkt_cls.h>
 #include <net/ip.h>
@@ -197,8 +198,11 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb)
 
 static u32 flow_get_skuid(const struct sk_buff *skb)
 {
-	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
-		kuid_t skuid = skb->sk->sk_socket->file->f_cred->fsuid;
+	struct sock *sk = skb_to_full_sk(skb);
+
+	if (sk && sk->sk_socket && sk->sk_socket->file) {
+		kuid_t skuid = sk->sk_socket->file->f_cred->fsuid;
+
 		return from_kuid(&init_user_ns, skuid);
 	}
 	return 0;
@@ -206,8 +210,11 @@ static u32 flow_get_skuid(const struct sk_buff *skb)
 
 static u32 flow_get_skgid(const struct sk_buff *skb)
 {
-	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) {
-		kgid_t skgid = skb->sk->sk_socket->file->f_cred->fsgid;
+	struct sock *sk = skb_to_full_sk(skb);
+
+	if (sk && sk->sk_socket && sk->sk_socket->file) {
+		kgid_t skgid = sk->sk_socket->file->f_cred->fsgid;
+
 		return from_kgid(&init_user_ns, skgid);
 	}
 	return 0;
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index df0328ba6a48..c66ca9400ab4 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -95,6 +95,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 	if (skb->skb_iif)
 		indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
 
+	acpar.net     = em->net;
 	acpar.in      = indev ? indev : dev;
 	acpar.out     = dev;
 
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index b5294ce20cd4..f2aabc0089da 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -343,119 +343,145 @@ META_COLLECTOR(int_sk_refcnt)
 
 META_COLLECTOR(int_sk_rcvbuf)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_rcvbuf;
+	dst->value = sk->sk_rcvbuf;
 }
 
 META_COLLECTOR(int_sk_shutdown)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_shutdown;
+	dst->value = sk->sk_shutdown;
 }
 
 META_COLLECTOR(int_sk_proto)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_protocol;
+	dst->value = sk->sk_protocol;
 }
 
 META_COLLECTOR(int_sk_type)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_type;
+	dst->value = sk->sk_type;
 }
 
 META_COLLECTOR(int_sk_rmem_alloc)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = sk_rmem_alloc_get(skb->sk);
+	dst->value = sk_rmem_alloc_get(sk);
 }
 
 META_COLLECTOR(int_sk_wmem_alloc)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = sk_wmem_alloc_get(skb->sk);
+	dst->value = sk_wmem_alloc_get(sk);
 }
 
 META_COLLECTOR(int_sk_omem_alloc)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = atomic_read(&skb->sk->sk_omem_alloc);
+	dst->value = atomic_read(&sk->sk_omem_alloc);
 }
 
 META_COLLECTOR(int_sk_rcv_qlen)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_receive_queue.qlen;
+	dst->value = sk->sk_receive_queue.qlen;
 }
 
 META_COLLECTOR(int_sk_snd_qlen)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_write_queue.qlen;
+	dst->value = sk->sk_write_queue.qlen;
 }
 
 META_COLLECTOR(int_sk_wmem_queued)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_wmem_queued;
+	dst->value = sk->sk_wmem_queued;
 }
 
 META_COLLECTOR(int_sk_fwd_alloc)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_forward_alloc;
+	dst->value = sk->sk_forward_alloc;
 }
 
 META_COLLECTOR(int_sk_sndbuf)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_sndbuf;
+	dst->value = sk->sk_sndbuf;
 }
 
 META_COLLECTOR(int_sk_alloc)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = (__force int) skb->sk->sk_allocation;
+	dst->value = (__force int) sk->sk_allocation;
 }
 
 META_COLLECTOR(int_sk_hash)
@@ -469,92 +495,112 @@ META_COLLECTOR(int_sk_hash)
 
 META_COLLECTOR(int_sk_lingertime)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_lingertime / HZ;
+	dst->value = sk->sk_lingertime / HZ;
 }
 
 META_COLLECTOR(int_sk_err_qlen)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_error_queue.qlen;
+	dst->value = sk->sk_error_queue.qlen;
 }
 
 META_COLLECTOR(int_sk_ack_bl)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_ack_backlog;
+	dst->value = sk->sk_ack_backlog;
 }
 
 META_COLLECTOR(int_sk_max_ack_bl)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_max_ack_backlog;
+	dst->value = sk->sk_max_ack_backlog;
 }
 
 META_COLLECTOR(int_sk_prio)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_priority;
+	dst->value = sk->sk_priority;
 }
 
 META_COLLECTOR(int_sk_rcvlowat)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_rcvlowat;
+	dst->value = sk->sk_rcvlowat;
 }
 
 META_COLLECTOR(int_sk_rcvtimeo)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_rcvtimeo / HZ;
+	dst->value = sk->sk_rcvtimeo / HZ;
 }
 
 META_COLLECTOR(int_sk_sndtimeo)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_sndtimeo / HZ;
+	dst->value = sk->sk_sndtimeo / HZ;
 }
 
 META_COLLECTOR(int_sk_sendmsg_off)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_frag.offset;
+	dst->value = sk->sk_frag.offset;
 }
 
 META_COLLECTOR(int_sk_write_pend)
 {
-	if (skip_nonlocal(skb)) {
+	const struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk) {
 		*err = -1;
 		return;
 	}
-	dst->value = skb->sk->sk_write_pending;
+	dst->value = sk->sk_write_pending;
 }
 
 /**************************************************************************
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 094a874b48bc..3fee70d9814f 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -11,7 +11,7 @@
  * Note: Quantum tunneling is not supported.
  */
 
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
@@ -37,17 +37,8 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
-static int __init blackhole_module_init(void)
+static int __init blackhole_init(void)
 {
 	return register_qdisc(&blackhole_qdisc_ops);
 }
-
-static void __exit blackhole_module_exit(void)
-{
-	unregister_qdisc(&blackhole_qdisc_ops);
-}
-
-module_init(blackhole_module_init)
-module_exit(blackhole_module_exit)
-
-MODULE_LICENSE("GPL");
+device_initcall(blackhole_init)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 02bfd3d1c4f0..5ffb8b8337c7 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -553,65 +553,6 @@ static void choke_destroy(struct Qdisc *sch)
 	choke_free(q->tab);
 }
 
-static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
-{
-	return NULL;
-}
-
-static unsigned long choke_get(struct Qdisc *sch, u32 classid)
-{
-	return 0;
-}
-
-static void choke_put(struct Qdisc *q, unsigned long cl)
-{
-}
-
-static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
-				u32 classid)
-{
-	return 0;
-}
-
-static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch,
-					       unsigned long cl)
-{
-	struct choke_sched_data *q = qdisc_priv(sch);
-
-	if (cl)
-		return NULL;
-	return &q->filter_list;
-}
-
-static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
-			  struct sk_buff *skb, struct tcmsg *tcm)
-{
-	tcm->tcm_handle |= TC_H_MIN(cl);
-	return 0;
-}
-
-static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
-{
-	if (!arg->stop) {
-		if (arg->fn(sch, 1, arg) < 0) {
-			arg->stop = 1;
-			return;
-		}
-		arg->count++;
-	}
-}
-
-static const struct Qdisc_class_ops choke_class_ops = {
-	.leaf		=	choke_leaf,
-	.get		=	choke_get,
-	.put		=	choke_put,
-	.tcf_chain	=	choke_find_tcf,
-	.bind_tcf	=	choke_bind,
-	.unbind_tcf	=	choke_put,
-	.dump		=	choke_dump_class,
-	.walk		=	choke_walk,
-};
-
 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
 {
 	struct choke_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index c4d45fd8c551..f357f34d02d2 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -35,14 +35,20 @@
 
 #define NO_DEFAULT_INDEX	(1 << 16)
 
+struct mask_value {
+	u8			mask;
+	u8			value;
+};
+
 struct dsmark_qdisc_data {
 	struct Qdisc		*q;
 	struct tcf_proto __rcu	*filter_list;
-	u8			*mask;	/* "owns" the array */
-	u8			*value;
+	struct mask_value	*mv;
 	u16			indices;
+	u8			set_tc_index;
 	u32			default_index;	/* index range is 0...0xffff */
-	int			set_tc_index;
+#define DSMARK_EMBEDDED_SZ	16
+	struct mask_value	embedded[DSMARK_EMBEDDED_SZ];
 };
 
 static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
@@ -116,7 +122,6 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_DSMARK_MAX + 1];
 	int err = -EINVAL;
-	u8 mask = 0;
 
 	pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
 		 __func__, sch, p, classid, parent, *arg);
@@ -133,14 +138,11 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
 	if (err < 0)
 		goto errout;
 
-	if (tb[TCA_DSMARK_MASK])
-		mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
-
 	if (tb[TCA_DSMARK_VALUE])
-		p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+		p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]);
 
 	if (tb[TCA_DSMARK_MASK])
-		p->mask[*arg - 1] = mask;
+		p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
 
 	err = 0;
 
@@ -155,8 +157,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
 	if (!dsmark_valid_index(p, arg))
 		return -EINVAL;
 
-	p->mask[arg - 1] = 0xff;
-	p->value[arg - 1] = 0;
+	p->mv[arg - 1].mask = 0xff;
+	p->mv[arg - 1].value = 0;
 
 	return 0;
 }
@@ -173,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 		return;
 
 	for (i = 0; i < p->indices; i++) {
-		if (p->mask[i] == 0xff && !p->value[i])
+		if (p->mv[i].mask == 0xff && !p->mv[i].value)
 			goto ignore;
 		if (walker->count >= walker->skip) {
 			if (walker->fn(sch, i + 1, walker) < 0) {
@@ -291,12 +293,12 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 
 	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
-		ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
-				    p->value[index]);
+		ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
+				    p->mv[index].value);
 			break;
 	case htons(ETH_P_IPV6):
-		ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
-				    p->value[index]);
+		ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask,
+				    p->mv[index].value);
 			break;
 	default:
 		/*
@@ -304,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 		 * This way, we can send non-IP traffic through dsmark
 		 * and don't need yet another qdisc as a bypass.
 		 */
-		if (p->mask[index] != 0xff || p->value[index])
+		if (p->mv[index].mask != 0xff || p->mv[index].value)
 			pr_warn("%s: unsupported protocol %d\n",
 				__func__, ntohs(tc_skb_protocol(skb)));
 		break;
@@ -346,7 +348,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	int err = -EINVAL;
 	u32 default_index = NO_DEFAULT_INDEX;
 	u16 indices;
-	u8 *mask;
+	int i;
 
 	pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
 
@@ -366,18 +368,18 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_DSMARK_DEFAULT_INDEX])
 		default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
 
-	mask = kmalloc(indices * 2, GFP_KERNEL);
-	if (mask == NULL) {
+	if (indices <= DSMARK_EMBEDDED_SZ)
+		p->mv = p->embedded;
+	else
+		p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL);
+	if (!p->mv) {
 		err = -ENOMEM;
 		goto errout;
 	}
-
-	p->mask = mask;
-	memset(p->mask, 0xff, indices);
-
-	p->value = p->mask + indices;
-	memset(p->value, 0, indices);
-
+	for (i = 0; i < indices; i++) {
+		p->mv[i].mask = 0xff;
+		p->mv[i].value = 0;
+	}
 	p->indices = indices;
 	p->default_index = default_index;
 	p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
@@ -410,7 +412,8 @@ static void dsmark_destroy(struct Qdisc *sch)
 
 	tcf_destroy_chain(&p->filter_list);
 	qdisc_destroy(p->q);
-	kfree(p->mask);
+	if (p->mv != p->embedded)
+		kfree(p->mv);
 }
 
 static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -430,8 +433,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 	opts = nla_nest_start(skb, TCA_OPTIONS);
 	if (opts == NULL)
 		goto nla_put_failure;
-	if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]) ||
-	    nla_put_u8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]))
+	if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
+	    nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f377702d4b91..109b2322778f 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -224,13 +224,16 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 	if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
 		return &q->internal;
 
-	/* SYNACK messages are attached to a listener socket.
-	 * 1) They are not part of a 'flow' yet
-	 * 2) We do not want to rate limit them (eg SYNFLOOD attack),
+	/* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
+	 * or a listener (SYNCOOKIE mode)
+	 * 1) request sockets are not full blown,
+	 *    they do not contain sk_pacing_rate
+	 * 2) They are not part of a 'flow' yet
+	 * 3) We do not want to rate limit them (eg SYNFLOOD attack),
 	 *    especially if the listener set SO_MAX_PACING_RATE
-	 * 3) We pretend they are orphaned
+	 * 4) We pretend they are orphaned
 	 */
-	if (!sk || sk->sk_state == TCP_LISTEN) {
+	if (!sk || sk_listener(sk)) {
 		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
 
 		/* By forcing low order bit to 1, we make sure to not
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b00f1f9611d6..559afd0ee7de 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1590,7 +1590,7 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
 /* Set an association id for a given association */
 int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
 {
-	bool preload = !!(gfp & __GFP_WAIT);
+	bool preload = gfpflags_allow_blocking(gfp);
 	int ret;
 
 	/* If the id is already assigned, keep it. */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 7954c52e1794..763e06a55155 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2494,7 +2494,7 @@ static int sctp_process_param(struct sctp_association *asoc,
 	__u16 sat;
 	int retval = 1;
 	sctp_scope_t scope;
-	time_t stale;
+	u32 stale;
 	struct sctp_af *af;
 	union sctp_addr_param *addr_param;
 	struct sctp_transport *t;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d7eaa7354cf7..6f46aa16cb76 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2306,7 +2306,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
 						 sctp_cmd_seq_t *commands)
 {
 	struct sctp_chunk *chunk = arg;
-	time_t stale;
+	u32 stale;
 	sctp_cookie_preserve_param_t bht;
 	sctp_errhdr_t *err;
 	struct sctp_chunk *reply;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 17bef01b9aa3..897c01c029ca 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4475,7 +4475,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
 	}
 
 	newfile = sock_alloc_file(newsock, 0, NULL);
-	if (unlikely(IS_ERR(newfile))) {
+	if (IS_ERR(newfile)) {
 		put_unused_fd(retval);
 		sock_release(newsock);
 		return PTR_ERR(newfile);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a0a431824f63..aab9e3f29755 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -331,7 +331,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 		 * 1/8, rto_alpha would be expressed as 3.
 		 */
 		tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta)
-			+ (((__u32)abs64((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
+			+ (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta);
 		tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha)
 			+ (rtt >> net->sctp.rto_alpha);
 	} else {
diff --git a/net/socket.c b/net/socket.c
index 9963a0b53a64..dd2c247c99e3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -373,7 +373,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 
 	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);
-	if (unlikely(IS_ERR(file))) {
+	if (IS_ERR(file)) {
 		/* drop dentry, keep inode */
 		ihold(d_inode(path.dentry));
 		path_put(&path);
@@ -1303,7 +1303,7 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
 	}
 
 	newfile1 = sock_alloc_file(sock1, flags, NULL);
-	if (unlikely(IS_ERR(newfile1))) {
+	if (IS_ERR(newfile1)) {
 		err = PTR_ERR(newfile1);
 		goto out_put_unused_both;
 	}
@@ -1467,7 +1467,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 		goto out_put;
 	}
 	newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
-	if (unlikely(IS_ERR(newfile))) {
+	if (IS_ERR(newfile)) {
 		err = PTR_ERR(newfile);
 		put_unused_fd(newfd);
 		sock_release(newsock);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index dace13d7638e..799e65b944b9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1411,17 +1411,16 @@ gss_key_timeout(struct rpc_cred *rc)
 {
 	struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
 	struct gss_cl_ctx *ctx;
-	unsigned long now = jiffies;
-	unsigned long expire;
+	unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ);
+	int ret = 0;
 
 	rcu_read_lock();
 	ctx = rcu_dereference(gss_cred->gc_ctx);
-	if (ctx)
-		expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ);
+	if (!ctx || time_after(timeout, ctx->gc_expiry))
+		ret = -EACCES;
 	rcu_read_unlock();
-	if (!ctx || time_after(now, expire))
-		return -EACCES;
-	return 0;
+
+	return ret;
 }
 
 static int
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 6255d141133b..229956bf8457 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -138,6 +138,14 @@ out_free:
  */
 int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
 {
+	if (!xprt->ops->bc_setup)
+		return 0;
+	return xprt->ops->bc_setup(xprt, min_reqs);
+}
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
+
+int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
+{
 	struct rpc_rqst *req;
 	struct list_head tmp_list;
 	int i;
@@ -192,7 +200,6 @@ out_free:
 	dprintk("RPC:       setup backchannel transport failed\n");
 	return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
 
 /**
  * xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
@@ -205,6 +212,13 @@ EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
  */
 void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
 {
+	if (xprt->ops->bc_destroy)
+		xprt->ops->bc_destroy(xprt, max_reqs);
+}
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
+
+void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
+{
 	struct rpc_rqst *req = NULL, *tmp = NULL;
 
 	dprintk("RPC:        destroy backchannel transport\n");
@@ -227,7 +241,6 @@ out:
 	dprintk("RPC:        backchannel list empty= %s\n",
 		list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
-EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
 
 static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
 {
@@ -264,6 +277,13 @@ void xprt_free_bc_request(struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 
+	xprt->ops->bc_free_rqst(req);
+}
+
+void xprt_free_bc_rqst(struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+
 	dprintk("RPC:       free backchannel req=%p\n", req);
 
 	req->rq_connect_cookie = xprt->connect_cookie - 1;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 4a2340a54401..5e4f815c2b34 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -41,13 +41,16 @@
 static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
 static void cache_revisit_request(struct cache_head *item);
 
-static void cache_init(struct cache_head *h)
+static void cache_init(struct cache_head *h, struct cache_detail *detail)
 {
 	time_t now = seconds_since_boot();
 	INIT_HLIST_NODE(&h->cache_list);
 	h->flags = 0;
 	kref_init(&h->ref);
 	h->expiry_time = now + CACHE_NEW_EXPIRY;
+	if (now <= detail->flush_time)
+		/* ensure it isn't already expired */
+		now = detail->flush_time + 1;
 	h->last_refresh = now;
 }
 
@@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 	 * we might get lose if we need to
 	 * cache_put it soon.
 	 */
-	cache_init(new);
+	cache_init(new, detail);
 	detail->init(new, key);
 
 	write_lock(&detail->hash_lock);
@@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
 
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
-static void cache_fresh_locked(struct cache_head *head, time_t expiry)
+static void cache_fresh_locked(struct cache_head *head, time_t expiry,
+			       struct cache_detail *detail)
 {
+	time_t now = seconds_since_boot();
+	if (now <= detail->flush_time)
+		/* ensure it isn't immediately treated as expired */
+		now = detail->flush_time + 1;
 	head->expiry_time = expiry;
-	head->last_refresh = seconds_since_boot();
+	head->last_refresh = now;
 	smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
 	set_bit(CACHE_VALID, &head->flags);
 }
@@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
 				detail->update(old, new);
-			cache_fresh_locked(old, new->expiry_time);
+			cache_fresh_locked(old, new->expiry_time, detail);
 			write_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(old, detail);
 			return old;
@@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 		cache_put(old, detail);
 		return NULL;
 	}
-	cache_init(tmp);
+	cache_init(tmp, detail);
 	detail->init(tmp, old);
 
 	write_lock(&detail->hash_lock);
@@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
 	detail->entries++;
 	cache_get(tmp);
-	cache_fresh_locked(tmp, new->expiry_time);
-	cache_fresh_locked(old, 0);
+	cache_fresh_locked(tmp, new->expiry_time, detail);
+	cache_fresh_locked(old, 0, detail);
 	write_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(tmp, detail);
 	cache_fresh_unlocked(old, detail);
@@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 	rv = cache_is_valid(h);
 	if (rv == -EAGAIN) {
 		set_bit(CACHE_NEGATIVE, &h->flags);
-		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
+		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
+				   detail);
 		rv = -ENOENT;
 	}
 	write_unlock(&detail->hash_lock);
@@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
 
 void cache_purge(struct cache_detail *detail)
 {
-	detail->flush_time = LONG_MAX;
+	time_t now = seconds_since_boot();
+	if (detail->flush_time >= now)
+		now = detail->flush_time + 1;
+	/* 'now' is the maximum value any 'last_refresh' can have */
+	detail->flush_time = now;
 	detail->nextcheck = seconds_since_boot();
 	cache_flush();
-	detail->flush_time = 1;
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
@@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 {
 	char tbuf[20];
 	char *bp, *ep;
+	time_t then, now;
 
 	if (*ppos || count > sizeof(tbuf)-1)
 		return -EINVAL;
@@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 		return -EINVAL;
 
 	bp = tbuf;
-	cd->flush_time = get_expiry(&bp);
-	cd->nextcheck = seconds_since_boot();
+	then = get_expiry(&bp);
+	now = seconds_since_boot();
+	cd->nextcheck = now;
+	/* Can only set flush_time to 1 second beyond "now", or
+	 * possibly 1 second beyond flushtime.  This is because
+	 * flush_time never goes backwards so it mustn't get too far
+	 * ahead of time.
+	 */
+	if (then >= now) {
+		/* Want to flush everything, so behave like cache_purge() */
+		if (cd->flush_time >= now)
+			now = cd->flush_time + 1;
+		then = now;
+	}
+
+	cd->flush_time = then;
 	cache_flush();
 
 	*ppos += count;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a8f579df14d8..bc5b7b5032ca 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1367,11 +1367,6 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
 	/* reset result send buffer "put" position */
 	resv->iov_len = 0;
 
-	if (rqstp->rq_prot != IPPROTO_TCP) {
-		printk(KERN_ERR "No support for Non-TCP transports!\n");
-		BUG();
-	}
-
 	/*
 	 * Skip the next two words because they've already been
 	 * processed in the transport
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0c8120229a03..1413cdcc131c 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -181,7 +181,7 @@ int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
 	struct page	**ppage = xdr->pages;
 	size_t		base = xdr->page_base;
 	unsigned int	pglen = xdr->page_len;
-	unsigned int	flags = MSG_MORE;
+	unsigned int	flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
 	int		slen;
 	int		len = 0;
 
@@ -399,6 +399,31 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
 	return svc_port_is_privileged(svc_addr(rqstp));
 }
 
+static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
+{
+	if (!wq)
+		return false;
+	/*
+	 * There should normally be a memory * barrier here--see
+	 * wq_has_sleeper().
+	 *
+	 * It appears that isn't currently necessary, though, basically
+	 * because callers all appear to have sufficient memory barriers
+	 * between the time the relevant change is made and the
+	 * time they call these callbacks.
+	 *
+	 * The nfsd code itself doesn't actually explicitly wait on
+	 * these waitqueues, but it may wait on them for example in
+	 * sendpage() or sendmsg() calls.  (And those may be the only
+	 * places, since it it uses nonblocking reads.)
+	 *
+	 * Maybe we should add the memory barriers anyway, but these are
+	 * hot paths so we'd need to be convinced there's no sigificant
+	 * penalty.
+	 */
+	return waitqueue_active(wq);
+}
+
 /*
  * INET callback when data has been received on the socket.
  */
@@ -414,7 +439,7 @@ static void svc_udp_data_ready(struct sock *sk)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (wq && waitqueue_active(wq))
+	if (sunrpc_waitqueue_active(wq))
 		wake_up_interruptible(wq);
 }
 
@@ -432,7 +457,7 @@ static void svc_write_space(struct sock *sk)
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 
-	if (wq && waitqueue_active(wq)) {
+	if (sunrpc_waitqueue_active(wq)) {
 		dprintk("RPC svc_write_space: someone sleeping on %p\n",
 		       svsk);
 		wake_up_interruptible(wq);
@@ -787,7 +812,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
 	}
 
 	wq = sk_sleep(sk);
-	if (wq && waitqueue_active(wq))
+	if (sunrpc_waitqueue_active(wq))
 		wake_up_interruptible_all(wq);
 }
 
@@ -808,7 +833,7 @@ static void svc_tcp_state_change(struct sock *sk)
 		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (wq && waitqueue_active(wq))
+	if (sunrpc_waitqueue_active(wq))
 		wake_up_interruptible_all(wq);
 }
 
@@ -823,7 +848,7 @@ static void svc_tcp_data_ready(struct sock *sk)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (wq && waitqueue_active(wq))
+	if (sunrpc_waitqueue_active(wq))
 		wake_up_interruptible(wq);
 }
 
@@ -1367,7 +1392,6 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
 
 /*
  * Initialize socket for RPC use and create svc_sock struct
- * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
  */
 static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
 						struct socket *sock,
@@ -1594,7 +1618,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)
 	sk->sk_write_space = svsk->sk_owspace;
 
 	wq = sk_sleep(sk);
-	if (wq && waitqueue_active(wq))
+	if (sunrpc_waitqueue_active(wq))
 		wake_up_interruptible(wq);
 }
 
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 887f0183b4c6..c88d9bc06f5c 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -76,7 +76,7 @@ static int
 proc_dodebug(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	char		tmpbuf[20], c, *s;
+	char		tmpbuf[20], c, *s = NULL;
 	char __user *p;
 	unsigned int	value;
 	size_t		left, len;
@@ -103,23 +103,24 @@ proc_dodebug(struct ctl_table *table, int write,
 			return -EFAULT;
 		tmpbuf[left] = '\0';
 
-		for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--)
-			value = 10 * value + (*s - '0');
-		if (*s && !isspace(*s))
-			return -EINVAL;
-		while (left && isspace(*s))
-			left--, s++;
+		value = simple_strtol(tmpbuf, &s, 0);
+		if (s) {
+			left -= (s - tmpbuf);
+			if (left && !isspace(*s))
+				return -EINVAL;
+			while (left && isspace(*s))
+				left--, s++;
+		} else
+			left = 0;
 		*(unsigned int *) table->data = value;
 		/* Display the RPC tasks on writing to rpc_debug */
 		if (strcmp(table->procname, "rpc_debug") == 0)
 			rpc_show_tasks(&init_net);
 	} else {
-		if (!access_ok(VERIFY_WRITE, buffer, left))
-			return -EFAULT;
-		len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
+		len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data);
 		if (len > left)
 			len = left;
-		if (__copy_to_user(buffer, tmpbuf, len))
+		if (copy_to_user(buffer, tmpbuf, len))
 			return -EFAULT;
 		if ((left -= len) > 0) {
 			if (put_user('\n', (char __user *)buffer + len))
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 48913de240bd..33f99d3004f2 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -5,3 +5,4 @@ rpcrdma-y := transport.o rpc_rdma.o verbs.o \
 	svc_rdma.o svc_rdma_transport.o \
 	svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
 	module.o
+rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
new file mode 100644
index 000000000000..2dcb44f69e53
--- /dev/null
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA.
+ */
+
+#include <linux/module.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY	RPCDBG_TRANS
+#endif
+
+#define RPCRDMA_BACKCHANNEL_DEBUG
+
+static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
+				 struct rpc_rqst *rqst)
+{
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+
+	spin_lock(&buf->rb_reqslock);
+	list_del(&req->rl_all);
+	spin_unlock(&buf->rb_reqslock);
+
+	rpcrdma_destroy_req(&r_xprt->rx_ia, req);
+
+	kfree(rqst);
+}
+
+static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
+				 struct rpc_rqst *rqst)
+{
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_regbuf *rb;
+	struct rpcrdma_req *req;
+	struct xdr_buf *buf;
+	size_t size;
+
+	req = rpcrdma_create_req(r_xprt);
+	if (!req)
+		return -ENOMEM;
+	req->rl_backchannel = true;
+
+	size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
+	rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
+	if (IS_ERR(rb))
+		goto out_fail;
+	req->rl_rdmabuf = rb;
+
+	size += RPCRDMA_INLINE_READ_THRESHOLD(rqst);
+	rb = rpcrdma_alloc_regbuf(ia, size, GFP_KERNEL);
+	if (IS_ERR(rb))
+		goto out_fail;
+	rb->rg_owner = req;
+	req->rl_sendbuf = rb;
+	/* so that rpcr_to_rdmar works when receiving a request */
+	rqst->rq_buffer = (void *)req->rl_sendbuf->rg_base;
+
+	buf = &rqst->rq_snd_buf;
+	buf->head[0].iov_base = rqst->rq_buffer;
+	buf->head[0].iov_len = 0;
+	buf->tail[0].iov_base = NULL;
+	buf->tail[0].iov_len = 0;
+	buf->page_len = 0;
+	buf->len = 0;
+	buf->buflen = size;
+
+	return 0;
+
+out_fail:
+	rpcrdma_bc_free_rqst(r_xprt, rqst);
+	return -ENOMEM;
+}
+
+/* Allocate and add receive buffers to the rpcrdma_buffer's
+ * existing list of rep's. These are released when the
+ * transport is destroyed.
+ */
+static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
+				 unsigned int count)
+{
+	struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
+	struct rpcrdma_rep *rep;
+	unsigned long flags;
+	int rc = 0;
+
+	while (count--) {
+		rep = rpcrdma_create_rep(r_xprt);
+		if (IS_ERR(rep)) {
+			pr_err("RPC:       %s: reply buffer alloc failed\n",
+			       __func__);
+			rc = PTR_ERR(rep);
+			break;
+		}
+
+		spin_lock_irqsave(&buffers->rb_lock, flags);
+		list_add(&rep->rr_list, &buffers->rb_recv_bufs);
+		spin_unlock_irqrestore(&buffers->rb_lock, flags);
+	}
+
+	return rc;
+}
+
+/**
+ * xprt_rdma_bc_setup - Pre-allocate resources for handling backchannel requests
+ * @xprt: transport associated with these backchannel resources
+ * @reqs: number of concurrent incoming requests to expect
+ *
+ * Returns 0 on success; otherwise a negative errno
+ */
+int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
+	struct rpc_rqst *rqst;
+	unsigned int i;
+	int rc;
+
+	/* The backchannel reply path returns each rpc_rqst to the
+	 * bc_pa_list _after_ the reply is sent. If the server is
+	 * faster than the client, it can send another backward
+	 * direction request before the rpc_rqst is returned to the
+	 * list. The client rejects the request in this case.
+	 *
+	 * Twice as many rpc_rqsts are prepared to ensure there is
+	 * always an rpc_rqst available as soon as a reply is sent.
+	 */
+	if (reqs > RPCRDMA_BACKWARD_WRS >> 1)
+		goto out_err;
+
+	for (i = 0; i < (reqs << 1); i++) {
+		rqst = kzalloc(sizeof(*rqst), GFP_KERNEL);
+		if (!rqst) {
+			pr_err("RPC:       %s: Failed to create bc rpc_rqst\n",
+			       __func__);
+			goto out_free;
+		}
+
+		rqst->rq_xprt = &r_xprt->rx_xprt;
+		INIT_LIST_HEAD(&rqst->rq_list);
+		INIT_LIST_HEAD(&rqst->rq_bc_list);
+
+		if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
+			goto out_free;
+
+		spin_lock_bh(&xprt->bc_pa_lock);
+		list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+		spin_unlock_bh(&xprt->bc_pa_lock);
+	}
+
+	rc = rpcrdma_bc_setup_reps(r_xprt, reqs);
+	if (rc)
+		goto out_free;
+
+	rc = rpcrdma_ep_post_extra_recv(r_xprt, reqs);
+	if (rc)
+		goto out_free;
+
+	buffer->rb_bc_srv_max_requests = reqs;
+	request_module("svcrdma");
+
+	return 0;
+
+out_free:
+	xprt_rdma_bc_destroy(xprt, reqs);
+
+out_err:
+	pr_err("RPC:       %s: setup backchannel transport failed\n", __func__);
+	return -ENOMEM;
+}
+
+/**
+ * xprt_rdma_bc_up - Create transport endpoint for backchannel service
+ * @serv: server endpoint
+ * @net: network namespace
+ *
+ * The "xprt" is an implied argument: it supplies the name of the
+ * backchannel transport class.
+ *
+ * Returns zero on success, negative errno on failure
+ */
+int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
+{
+	int ret;
+
+	ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+/**
+ * rpcrdma_bc_marshal_reply - Send backwards direction reply
+ * @rqst: buffer containing RPC reply data
+ *
+ * Returns zero on success.
+ */
+int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
+{
+	struct rpc_xprt *xprt = rqst->rq_xprt;
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+	struct rpcrdma_msg *headerp;
+	size_t rpclen;
+
+	headerp = rdmab_to_msg(req->rl_rdmabuf);
+	headerp->rm_xid = rqst->rq_xid;
+	headerp->rm_vers = rpcrdma_version;
+	headerp->rm_credit =
+			cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests);
+	headerp->rm_type = rdma_msg;
+	headerp->rm_body.rm_chunks[0] = xdr_zero;
+	headerp->rm_body.rm_chunks[1] = xdr_zero;
+	headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+	rpclen = rqst->rq_svec[0].iov_len;
+
+	pr_info("RPC:       %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
+		__func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
+	pr_info("RPC:       %s: RPC/RDMA: %*ph\n",
+		__func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
+	pr_info("RPC:       %s:      RPC: %*ph\n",
+		__func__, (int)rpclen, rqst->rq_svec[0].iov_base);
+
+	req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
+	req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
+	req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
+
+	req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
+	req->rl_send_iov[1].length = rpclen;
+	req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
+
+	req->rl_niovs = 2;
+	return 0;
+}
+
+/**
+ * xprt_rdma_bc_destroy - Release resources for handling backchannel requests
+ * @xprt: transport associated with these backchannel resources
+ * @reqs: number of incoming requests to destroy; ignored
+ */
+void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
+{
+	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+	struct rpc_rqst *rqst, *tmp;
+
+	spin_lock_bh(&xprt->bc_pa_lock);
+	list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
+		list_del(&rqst->rq_bc_pa_list);
+		spin_unlock_bh(&xprt->bc_pa_lock);
+
+		rpcrdma_bc_free_rqst(r_xprt, rqst);
+
+		spin_lock_bh(&xprt->bc_pa_lock);
+	}
+	spin_unlock_bh(&xprt->bc_pa_lock);
+}
+
+/**
+ * xprt_rdma_bc_free_rqst - Release a backchannel rqst
+ * @rqst: request to release
+ */
+void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
+{
+	struct rpc_xprt *xprt = rqst->rq_xprt;
+
+	smp_mb__before_atomic();
+	WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
+	clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+	smp_mb__after_atomic();
+
+	spin_lock_bh(&xprt->bc_pa_lock);
+	list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
+	spin_unlock_bh(&xprt->bc_pa_lock);
+}
+
+/**
+ * rpcrdma_bc_receive_call - Handle a backward direction call
+ * @xprt: transport receiving the call
+ * @rep: receive buffer containing the call
+ *
+ * Called in the RPC reply handler, which runs in a tasklet.
+ * Be quick about it.
+ *
+ * Operational assumptions:
+ *    o Backchannel credits are ignored, just as the NFS server
+ *      forechannel currently does
+ *    o The ULP manages a replay cache (eg, NFSv4.1 sessions).
+ *      No replay detection is done at the transport level
+ */
+void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
+			     struct rpcrdma_rep *rep)
+{
+	struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+	struct rpcrdma_msg *headerp;
+	struct svc_serv *bc_serv;
+	struct rpcrdma_req *req;
+	struct rpc_rqst *rqst;
+	struct xdr_buf *buf;
+	size_t size;
+	__be32 *p;
+
+	headerp = rdmab_to_msg(rep->rr_rdmabuf);
+#ifdef RPCRDMA_BACKCHANNEL_DEBUG
+	pr_info("RPC:       %s: callback XID %08x, length=%u\n",
+		__func__, be32_to_cpu(headerp->rm_xid), rep->rr_len);
+	pr_info("RPC:       %s: %*ph\n", __func__, rep->rr_len, headerp);
+#endif
+
+	/* Sanity check:
+	 * Need at least enough bytes for RPC/RDMA header, as code
+	 * here references the header fields by array offset. Also,
+	 * backward calls are always inline, so ensure there
+	 * are some bytes beyond the RPC/RDMA header.
+	 */
+	if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24)
+		goto out_short;
+	p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN);
+	size = rep->rr_len - RPCRDMA_HDRLEN_MIN;
+
+	/* Grab a free bc rqst */
+	spin_lock(&xprt->bc_pa_lock);
+	if (list_empty(&xprt->bc_pa_list)) {
+		spin_unlock(&xprt->bc_pa_lock);
+		goto out_overflow;
+	}
+	rqst = list_first_entry(&xprt->bc_pa_list,
+				struct rpc_rqst, rq_bc_pa_list);
+	list_del(&rqst->rq_bc_pa_list);
+	spin_unlock(&xprt->bc_pa_lock);
+#ifdef RPCRDMA_BACKCHANNEL_DEBUG
+	pr_info("RPC:       %s: using rqst %p\n", __func__, rqst);
+#endif
+
+	/* Prepare rqst */
+	rqst->rq_reply_bytes_recvd = 0;
+	rqst->rq_bytes_sent = 0;
+	rqst->rq_xid = headerp->rm_xid;
+	set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
+
+	buf = &rqst->rq_rcv_buf;
+	memset(buf, 0, sizeof(*buf));
+	buf->head[0].iov_base = p;
+	buf->head[0].iov_len = size;
+	buf->len = size;
+
+	/* The receive buffer has to be hooked to the rpcrdma_req
+	 * so that it can be reposted after the server is done
+	 * parsing it but just before sending the backward
+	 * direction reply.
+	 */
+	req = rpcr_to_rdmar(rqst);
+#ifdef RPCRDMA_BACKCHANNEL_DEBUG
+	pr_info("RPC:       %s: attaching rep %p to req %p\n",
+		__func__, rep, req);
+#endif
+	req->rl_reply = rep;
+
+	/* Defeat the retransmit detection logic in send_request */
+	req->rl_connect_cookie = 0;
+
+	/* Queue rqst for ULP's callback service */
+	bc_serv = xprt->bc_serv;
+	spin_lock(&bc_serv->sv_cb_lock);
+	list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
+	spin_unlock(&bc_serv->sv_cb_lock);
+
+	wake_up(&bc_serv->sv_cb_waitq);
+
+	r_xprt->rx_stats.bcall_count++;
+	return;
+
+out_overflow:
+	pr_warn("RPC/RDMA backchannel overflow\n");
+	xprt_disconnect_done(xprt);
+	/* This receive buffer gets reposted automatically
+	 * when the connection is re-established.
+	 */
+	return;
+
+out_short:
+	pr_warn("RPC/RDMA short backward direction call\n");
+
+	if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+		xprt_disconnect_done(xprt);
+	else
+		pr_warn("RPC:       %s: reposting rep %p\n",
+			__func__, rep);
+}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 5318951b3b53..88cf9e7269c2 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
 	f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
 	if (IS_ERR(f->fr_mr))
 		goto out_mr_err;
-	f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
-	if (IS_ERR(f->fr_pgl))
+
+	f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
+	if (!f->sg)
 		goto out_list_err;
+
+	sg_init_table(f->sg, depth);
+
 	return 0;
 
 out_mr_err:
@@ -163,9 +167,9 @@ out_mr_err:
 	return rc;
 
 out_list_err:
-	rc = PTR_ERR(f->fr_pgl);
-	dprintk("RPC:       %s: ib_alloc_fast_reg_page_list status %i\n",
-		__func__, rc);
+	rc = -ENOMEM;
+	dprintk("RPC:       %s: sg allocation failure\n",
+		__func__);
 	ib_dereg_mr(f->fr_mr);
 	return rc;
 }
@@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
 	if (rc)
 		dprintk("RPC:       %s: ib_dereg_mr status %i\n",
 			__func__, rc);
-	ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+	kfree(r->r.frmr.sg);
 }
 
 static int
@@ -252,8 +256,11 @@ frwr_sendcompletion(struct ib_wc *wc)
 
 	/* WARNING: Only wr_id and status are reliable at this point */
 	r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
-	pr_warn("RPC:       %s: frmr %p flushed, status %s (%d)\n",
-		__func__, r, ib_wc_status_msg(wc->status), wc->status);
+	if (wc->status == IB_WC_WR_FLUSH_ERR)
+		dprintk("RPC:       %s: frmr %p flushed\n", __func__, r);
+	else
+		pr_warn("RPC:       %s: frmr %p error, status %s (%d)\n",
+			__func__, r, ib_wc_status_msg(wc->status), wc->status);
 	r->r.frmr.fr_state = FRMR_IS_STALE;
 }
 
@@ -312,13 +319,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	struct rpcrdma_mw *mw;
 	struct rpcrdma_frmr *frmr;
 	struct ib_mr *mr;
-	struct ib_send_wr fastreg_wr, *bad_wr;
+	struct ib_reg_wr reg_wr;
+	struct ib_send_wr *bad_wr;
+	int rc, i, n, dma_nents;
 	u8 key;
-	int len, pageoff;
-	int i, rc;
-	int seg_len;
-	u64 pa;
-	int page_no;
 
 	mw = seg1->rl_mw;
 	seg1->rl_mw = NULL;
@@ -331,64 +335,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 	} while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
 	frmr = &mw->r.frmr;
 	frmr->fr_state = FRMR_IS_VALID;
+	mr = frmr->fr_mr;
 
-	pageoff = offset_in_page(seg1->mr_offset);
-	seg1->mr_offset -= pageoff;	/* start of page */
-	seg1->mr_len += pageoff;
-	len = -pageoff;
 	if (nsegs > ia->ri_max_frmr_depth)
 		nsegs = ia->ri_max_frmr_depth;
 
-	for (page_no = i = 0; i < nsegs;) {
-		rpcrdma_map_one(device, seg, direction);
-		pa = seg->mr_dma;
-		for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
-			frmr->fr_pgl->page_list[page_no++] = pa;
-			pa += PAGE_SIZE;
-		}
-		len += seg->mr_len;
+	for (i = 0; i < nsegs;) {
+		if (seg->mr_page)
+			sg_set_page(&frmr->sg[i],
+				    seg->mr_page,
+				    seg->mr_len,
+				    offset_in_page(seg->mr_offset));
+		else
+			sg_set_buf(&frmr->sg[i], seg->mr_offset,
+				   seg->mr_len);
+
 		++seg;
 		++i;
+
 		/* Check for holes */
 		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
 			break;
 	}
-	dprintk("RPC:       %s: Using frmr %p to map %d segments (%d bytes)\n",
-		__func__, mw, i, len);
-
-	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-	fastreg_wr.wr_id = (unsigned long)(void *)mw;
-	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-	fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
-	fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
-	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-	fastreg_wr.wr.fast_reg.page_list_len = page_no;
-	fastreg_wr.wr.fast_reg.length = len;
-	fastreg_wr.wr.fast_reg.access_flags = writing ?
-				IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
-				IB_ACCESS_REMOTE_READ;
-	mr = frmr->fr_mr;
+	frmr->sg_nents = i;
+
+	dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
+	if (!dma_nents) {
+		pr_err("RPC:       %s: failed to dma map sg %p sg_nents %u\n",
+		       __func__, frmr->sg, frmr->sg_nents);
+		return -ENOMEM;
+	}
+
+	n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+	if (unlikely(n != frmr->sg_nents)) {
+		pr_err("RPC:       %s: failed to map mr %p (%u/%u)\n",
+		       __func__, frmr->fr_mr, n, frmr->sg_nents);
+		rc = n < 0 ? n : -EINVAL;
+		goto out_senderr;
+	}
+
+	dprintk("RPC:       %s: Using frmr %p to map %u segments (%u bytes)\n",
+		__func__, mw, frmr->sg_nents, mr->length);
+
 	key = (u8)(mr->rkey & 0x000000FF);
 	ib_update_fast_reg_key(mr, ++key);
-	fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+	reg_wr.wr.next = NULL;
+	reg_wr.wr.opcode = IB_WR_REG_MR;
+	reg_wr.wr.wr_id = (uintptr_t)mw;
+	reg_wr.wr.num_sge = 0;
+	reg_wr.wr.send_flags = 0;
+	reg_wr.mr = mr;
+	reg_wr.key = mr->rkey;
+	reg_wr.access = writing ?
+			IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+			IB_ACCESS_REMOTE_READ;
 
 	DECR_CQCOUNT(&r_xprt->rx_ep);
-	rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
+	rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr);
 	if (rc)
 		goto out_senderr;
 
+	seg1->mr_dir = direction;
 	seg1->rl_mw = mw;
 	seg1->mr_rkey = mr->rkey;
-	seg1->mr_base = seg1->mr_dma + pageoff;
-	seg1->mr_nsegs = i;
-	seg1->mr_len = len;
-	return i;
+	seg1->mr_base = mr->iova;
+	seg1->mr_nsegs = frmr->sg_nents;
+	seg1->mr_len = mr->length;
+
+	return frmr->sg_nents;
 
 out_senderr:
 	dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
-	while (i--)
-		rpcrdma_unmap_one(device, --seg);
+	ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction);
 	__frwr_queue_recovery(mw);
 	return rc;
 }
@@ -402,22 +422,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
 	struct rpcrdma_mr_seg *seg1 = seg;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_mw *mw = seg1->rl_mw;
+	struct rpcrdma_frmr *frmr = &mw->r.frmr;
 	struct ib_send_wr invalidate_wr, *bad_wr;
 	int rc, nsegs = seg->mr_nsegs;
 
 	dprintk("RPC:       %s: FRMR %p\n", __func__, mw);
 
 	seg1->rl_mw = NULL;
-	mw->r.frmr.fr_state = FRMR_IS_INVALID;
+	frmr->fr_state = FRMR_IS_INVALID;
 
 	memset(&invalidate_wr, 0, sizeof(invalidate_wr));
 	invalidate_wr.wr_id = (unsigned long)(void *)mw;
 	invalidate_wr.opcode = IB_WR_LOCAL_INV;
-	invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
+	invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey;
 	DECR_CQCOUNT(&r_xprt->rx_ep);
 
-	while (seg1->mr_nsegs--)
-		rpcrdma_unmap_one(ia->ri_device, seg++);
+	ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
 	read_lock(&ia->ri_qplock);
 	rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
 	read_unlock(&ia->ri_qplock);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index bc8bd6577467..c10d9699441c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -441,6 +441,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
 	enum rpcrdma_chunktype rtype, wtype;
 	struct rpcrdma_msg *headerp;
 
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
+		return rpcrdma_bc_marshal_reply(rqst);
+#endif
+
 	/*
 	 * rpclen gets amount of data in first buffer, which is the
 	 * pre-registered buffer.
@@ -711,6 +716,37 @@ rpcrdma_connect_worker(struct work_struct *work)
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
+{
+	__be32 *p = (__be32 *)headerp;
+
+	if (headerp->rm_type != rdma_msg)
+		return false;
+	if (headerp->rm_body.rm_chunks[0] != xdr_zero)
+		return false;
+	if (headerp->rm_body.rm_chunks[1] != xdr_zero)
+		return false;
+	if (headerp->rm_body.rm_chunks[2] != xdr_zero)
+		return false;
+
+	/* sanity */
+	if (p[7] != headerp->rm_xid)
+		return false;
+	/* call direction */
+	if (p[8] != cpu_to_be32(RPC_CALL))
+		return false;
+
+	return true;
+}
+#endif	/* CONFIG_SUNRPC_BACKCHANNEL */
+
 /*
  * This function is called when an async event is posted to
  * the connection which changes the connection state. All it
@@ -723,8 +759,8 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
 	schedule_delayed_work(&ep->rep_connect_worker, 0);
 }
 
-/*
- * Called as a tasklet to do req/reply match and complete a request
+/* Process received RPC/RDMA messages.
+ *
  * Errors must result in the RPC task either being awakened, or
  * allowed to timeout, to discover the errors at that time.
  */
@@ -741,52 +777,32 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	unsigned long cwnd;
 	u32 credits;
 
-	/* Check status. If bad, signal disconnect and return rep to pool */
-	if (rep->rr_len == ~0U) {
-		rpcrdma_recv_buffer_put(rep);
-		if (r_xprt->rx_ep.rep_connected == 1) {
-			r_xprt->rx_ep.rep_connected = -EIO;
-			rpcrdma_conn_func(&r_xprt->rx_ep);
-		}
-		return;
-	}
-	if (rep->rr_len < RPCRDMA_HDRLEN_MIN) {
-		dprintk("RPC:       %s: short/invalid reply\n", __func__);
-		goto repost;
-	}
+	dprintk("RPC:       %s: incoming rep %p\n", __func__, rep);
+
+	if (rep->rr_len == RPCRDMA_BAD_LEN)
+		goto out_badstatus;
+	if (rep->rr_len < RPCRDMA_HDRLEN_MIN)
+		goto out_shortreply;
+
 	headerp = rdmab_to_msg(rep->rr_rdmabuf);
-	if (headerp->rm_vers != rpcrdma_version) {
-		dprintk("RPC:       %s: invalid version %d\n",
-			__func__, be32_to_cpu(headerp->rm_vers));
-		goto repost;
-	}
+	if (headerp->rm_vers != rpcrdma_version)
+		goto out_badversion;
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	if (rpcrdma_is_bcall(headerp))
+		goto out_bcall;
+#endif
 
-	/* Get XID and try for a match. */
-	spin_lock(&xprt->transport_lock);
+	/* Match incoming rpcrdma_rep to an rpcrdma_req to
+	 * get context for handling any incoming chunks.
+	 */
+	spin_lock_bh(&xprt->transport_lock);
 	rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
-	if (rqst == NULL) {
-		spin_unlock(&xprt->transport_lock);
-		dprintk("RPC:       %s: reply 0x%p failed "
-			"to match any request xid 0x%08x len %d\n",
-			__func__, rep, be32_to_cpu(headerp->rm_xid),
-			rep->rr_len);
-repost:
-		r_xprt->rx_stats.bad_reply_count++;
-		if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
-			rpcrdma_recv_buffer_put(rep);
+	if (!rqst)
+		goto out_nomatch;
 
-		return;
-	}
-
-	/* get request object */
 	req = rpcr_to_rdmar(rqst);
-	if (req->rl_reply) {
-		spin_unlock(&xprt->transport_lock);
-		dprintk("RPC:       %s: duplicate reply 0x%p to RPC "
-			"request 0x%p: xid 0x%08x\n", __func__, rep, req,
-			be32_to_cpu(headerp->rm_xid));
-		goto repost;
-	}
+	if (req->rl_reply)
+		goto out_duplicate;
 
 	dprintk("RPC:       %s: reply 0x%p completes request 0x%p\n"
 		"                   RPC request 0x%p xid 0x%08x\n",
@@ -883,8 +899,50 @@ badheader:
 	if (xprt->cwnd > cwnd)
 		xprt_release_rqst_cong(rqst->rq_task);
 
+	xprt_complete_rqst(rqst->rq_task, status);
+	spin_unlock_bh(&xprt->transport_lock);
 	dprintk("RPC:       %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
 			__func__, xprt, rqst, status);
-	xprt_complete_rqst(rqst->rq_task, status);
-	spin_unlock(&xprt->transport_lock);
+	return;
+
+out_badstatus:
+	rpcrdma_recv_buffer_put(rep);
+	if (r_xprt->rx_ep.rep_connected == 1) {
+		r_xprt->rx_ep.rep_connected = -EIO;
+		rpcrdma_conn_func(&r_xprt->rx_ep);
+	}
+	return;
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+out_bcall:
+	rpcrdma_bc_receive_call(r_xprt, rep);
+	return;
+#endif
+
+out_shortreply:
+	dprintk("RPC:       %s: short/invalid reply\n", __func__);
+	goto repost;
+
+out_badversion:
+	dprintk("RPC:       %s: invalid version %d\n",
+		__func__, be32_to_cpu(headerp->rm_vers));
+	goto repost;
+
+out_nomatch:
+	spin_unlock_bh(&xprt->transport_lock);
+	dprintk("RPC:       %s: no match for incoming xid 0x%08x len %d\n",
+		__func__, be32_to_cpu(headerp->rm_xid),
+		rep->rr_len);
+	goto repost;
+
+out_duplicate:
+	spin_unlock_bh(&xprt->transport_lock);
+	dprintk("RPC:       %s: "
+		"duplicate reply %p to RPC request %p: xid 0x%08x\n",
+		__func__, rep, req, be32_to_cpu(headerp->rm_xid));
+
+repost:
+	r_xprt->rx_stats.bad_reply_count++;
+	if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
+		rpcrdma_recv_buffer_put(rep);
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 2cd252f023a5..1b7051bdbdc8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -239,6 +239,9 @@ void svc_rdma_cleanup(void)
 		unregister_sysctl_table(svcrdma_table_header);
 		svcrdma_table_header = NULL;
 	}
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	svc_unreg_xprt_class(&svc_rdma_bc_class);
+#endif
 	svc_unreg_xprt_class(&svc_rdma_class);
 	kmem_cache_destroy(svc_rdma_map_cachep);
 	kmem_cache_destroy(svc_rdma_ctxt_cachep);
@@ -286,6 +289,9 @@ int svc_rdma_init(void)
 
 	/* Register RDMA with the SVC transport switch */
 	svc_reg_xprt_class(&svc_rdma_class);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	svc_reg_xprt_class(&svc_rdma_bc_class);
+#endif
 	return 0;
  err1:
 	kmem_cache_destroy(svc_rdma_map_cachep);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f0c3ff67ca98..ff4f01e527ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 			u64 rs_offset,
 			bool last)
 {
-	struct ib_send_wr read_wr;
+	struct ib_rdma_wr read_wr;
 	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
 	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
 	int ret, read, pno;
@@ -180,16 +180,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
 	memset(&read_wr, 0, sizeof(read_wr));
-	read_wr.wr_id = (unsigned long)ctxt;
-	read_wr.opcode = IB_WR_RDMA_READ;
-	ctxt->wr_op = read_wr.opcode;
-	read_wr.send_flags = IB_SEND_SIGNALED;
-	read_wr.wr.rdma.rkey = rs_handle;
-	read_wr.wr.rdma.remote_addr = rs_offset;
-	read_wr.sg_list = ctxt->sge;
-	read_wr.num_sge = pages_needed;
-
-	ret = svc_rdma_send(xprt, &read_wr);
+	read_wr.wr.wr_id = (unsigned long)ctxt;
+	read_wr.wr.opcode = IB_WR_RDMA_READ;
+	ctxt->wr_op = read_wr.wr.opcode;
+	read_wr.wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.rkey = rs_handle;
+	read_wr.remote_addr = rs_offset;
+	read_wr.wr.sg_list = ctxt->sge;
+	read_wr.wr.num_sge = pages_needed;
+
+	ret = svc_rdma_send(xprt, &read_wr.wr);
 	if (ret) {
 		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
 		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -219,14 +219,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 			 u64 rs_offset,
 			 bool last)
 {
-	struct ib_send_wr read_wr;
+	struct ib_rdma_wr read_wr;
 	struct ib_send_wr inv_wr;
-	struct ib_send_wr fastreg_wr;
+	struct ib_reg_wr reg_wr;
 	u8 key;
-	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
 	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
 	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
-	int ret, read, pno;
+	int ret, read, pno, dma_nents, n;
 	u32 pg_off = *page_offset;
 	u32 pg_no = *page_no;
 
@@ -235,17 +235,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 
 	ctxt->direction = DMA_FROM_DEVICE;
 	ctxt->frmr = frmr;
-	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
-	read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
-		     rs_length);
+	nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
+	read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
 
-	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
 	frmr->direction = DMA_FROM_DEVICE;
 	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->map_len = pages_needed << PAGE_SHIFT;
-	frmr->page_list_len = pages_needed;
+	frmr->sg_nents = nents;
 
-	for (pno = 0; pno < pages_needed; pno++) {
+	for (pno = 0; pno < nents; pno++) {
 		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
 
 		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -253,17 +250,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 		head->arg.len += len;
 		if (!pg_off)
 			head->count++;
+
+		sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
+			    len, pg_off);
+
 		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
 		rqstp->rq_next_page = rqstp->rq_respages + 1;
-		frmr->page_list->page_list[pno] =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					head->arg.pages[pg_no], 0,
-					PAGE_SIZE, DMA_FROM_DEVICE);
-		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
-					   frmr->page_list->page_list[pno]);
-		if (ret)
-			goto err;
-		atomic_inc(&xprt->sc_dma_used);
 
 		/* adjust offset and wrap to next page if needed */
 		pg_off += len;
@@ -279,43 +271,57 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	else
 		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
+	dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
+				  frmr->sg, frmr->sg_nents,
+				  frmr->direction);
+	if (!dma_nents) {
+		pr_err("svcrdma: failed to dma map sg %p\n",
+		       frmr->sg);
+		return -ENOMEM;
+	}
+	atomic_inc(&xprt->sc_dma_used);
+
+	n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+	if (unlikely(n != frmr->sg_nents)) {
+		pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
+		       frmr->mr, n, frmr->sg_nents);
+		return n < 0 ? n : -EINVAL;
+	}
+
 	/* Bump the key */
 	key = (u8)(frmr->mr->lkey & 0x000000FF);
 	ib_update_fast_reg_key(frmr->mr, ++key);
 
-	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+	ctxt->sge[0].addr = frmr->mr->iova;
 	ctxt->sge[0].lkey = frmr->mr->lkey;
-	ctxt->sge[0].length = read;
+	ctxt->sge[0].length = frmr->mr->length;
 	ctxt->count = 1;
 	ctxt->read_hdr = head;
 
-	/* Prepare FASTREG WR */
-	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-	fastreg_wr.send_flags = IB_SEND_SIGNALED;
-	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
-	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
-	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
-	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-	fastreg_wr.wr.fast_reg.length = frmr->map_len;
-	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
-	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
-	fastreg_wr.next = &read_wr;
+	/* Prepare REG WR */
+	reg_wr.wr.opcode = IB_WR_REG_MR;
+	reg_wr.wr.wr_id = 0;
+	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
+	reg_wr.wr.num_sge = 0;
+	reg_wr.mr = frmr->mr;
+	reg_wr.key = frmr->mr->lkey;
+	reg_wr.access = frmr->access_flags;
+	reg_wr.wr.next = &read_wr.wr;
 
 	/* Prepare RDMA_READ */
 	memset(&read_wr, 0, sizeof(read_wr));
-	read_wr.send_flags = IB_SEND_SIGNALED;
-	read_wr.wr.rdma.rkey = rs_handle;
-	read_wr.wr.rdma.remote_addr = rs_offset;
-	read_wr.sg_list = ctxt->sge;
-	read_wr.num_sge = 1;
+	read_wr.wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.rkey = rs_handle;
+	read_wr.remote_addr = rs_offset;
+	read_wr.wr.sg_list = ctxt->sge;
+	read_wr.wr.num_sge = 1;
 	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
-		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
-		read_wr.wr_id = (unsigned long)ctxt;
-		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+		read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+		read_wr.wr.wr_id = (unsigned long)ctxt;
+		read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
 	} else {
-		read_wr.opcode = IB_WR_RDMA_READ;
-		read_wr.next = &inv_wr;
+		read_wr.wr.opcode = IB_WR_RDMA_READ;
+		read_wr.wr.next = &inv_wr;
 		/* Prepare invalidate */
 		memset(&inv_wr, 0, sizeof(inv_wr));
 		inv_wr.wr_id = (unsigned long)ctxt;
@@ -323,10 +329,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 		inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
 		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
 	}
-	ctxt->wr_op = read_wr.opcode;
+	ctxt->wr_op = read_wr.wr.opcode;
 
 	/* Post the chain */
-	ret = svc_rdma_send(xprt, &fastreg_wr);
+	ret = svc_rdma_send(xprt, &reg_wr.wr);
 	if (ret) {
 		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
 		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -340,7 +346,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 	atomic_inc(&rdma_stat_read);
 	return ret;
  err:
-	svc_rdma_unmap_dma(ctxt);
+	ib_dma_unmap_sg(xprt->sc_cm_id->device,
+			frmr->sg, frmr->sg_nents, frmr->direction);
 	svc_rdma_put_context(ctxt, 0);
 	svc_rdma_put_frmr(xprt, frmr);
 	return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1dfae8317065..969a1ab75fc3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 		      u32 xdr_off, int write_len,
 		      struct svc_rdma_req_map *vec)
 {
-	struct ib_send_wr write_wr;
+	struct ib_rdma_wr write_wr;
 	struct ib_sge *sge;
 	int xdr_sge_no;
 	int sge_no;
@@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	/* Prepare WRITE WR */
 	memset(&write_wr, 0, sizeof write_wr);
 	ctxt->wr_op = IB_WR_RDMA_WRITE;
-	write_wr.wr_id = (unsigned long)ctxt;
-	write_wr.sg_list = &sge[0];
-	write_wr.num_sge = sge_no;
-	write_wr.opcode = IB_WR_RDMA_WRITE;
-	write_wr.send_flags = IB_SEND_SIGNALED;
-	write_wr.wr.rdma.rkey = rmr;
-	write_wr.wr.rdma.remote_addr = to;
+	write_wr.wr.wr_id = (unsigned long)ctxt;
+	write_wr.wr.sg_list = &sge[0];
+	write_wr.wr.num_sge = sge_no;
+	write_wr.wr.opcode = IB_WR_RDMA_WRITE;
+	write_wr.wr.send_flags = IB_SEND_SIGNALED;
+	write_wr.rkey = rmr;
+	write_wr.remote_addr = to;
 
 	/* Post It */
 	atomic_inc(&rdma_stat_write);
-	if (svc_rdma_send(xprt, &write_wr))
+	if (svc_rdma_send(xprt, &write_wr.wr))
 		goto err;
 	return write_len - bc;
  err:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fcc3eb80c265..b348b4adef29 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -56,6 +56,7 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
+static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
 static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 					struct net *net,
 					struct sockaddr *sa, int salen,
@@ -95,6 +96,63 @@ struct svc_xprt_class svc_rdma_class = {
 	.xcl_ident = XPRT_TRANSPORT_RDMA,
 };
 
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
+					   struct sockaddr *, int, int);
+static void svc_rdma_bc_detach(struct svc_xprt *);
+static void svc_rdma_bc_free(struct svc_xprt *);
+
+static struct svc_xprt_ops svc_rdma_bc_ops = {
+	.xpo_create = svc_rdma_bc_create,
+	.xpo_detach = svc_rdma_bc_detach,
+	.xpo_free = svc_rdma_bc_free,
+	.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
+	.xpo_secure_port = svc_rdma_secure_port,
+};
+
+struct svc_xprt_class svc_rdma_bc_class = {
+	.xcl_name = "rdma-bc",
+	.xcl_owner = THIS_MODULE,
+	.xcl_ops = &svc_rdma_bc_ops,
+	.xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
+};
+
+static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
+					   struct net *net,
+					   struct sockaddr *sa, int salen,
+					   int flags)
+{
+	struct svcxprt_rdma *cma_xprt;
+	struct svc_xprt *xprt;
+
+	cma_xprt = rdma_create_xprt(serv, 0);
+	if (!cma_xprt)
+		return ERR_PTR(-ENOMEM);
+	xprt = &cma_xprt->sc_xprt;
+
+	svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
+	serv->sv_bc_xprt = xprt;
+
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+	return xprt;
+}
+
+static void svc_rdma_bc_detach(struct svc_xprt *xprt)
+{
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+}
+
+static void svc_rdma_bc_free(struct svc_xprt *xprt)
+{
+	struct svcxprt_rdma *rdma =
+		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
+	dprintk("svcrdma: %s(%p)\n", __func__, xprt);
+	if (xprt)
+		kfree(rdma);
+}
+#endif	/* CONFIG_SUNRPC_BACKCHANNEL */
+
 struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt;
@@ -692,8 +750,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 	if (!cma_xprt)
 		return ERR_PTR(-ENOMEM);
 
-	listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
-				   IB_QPT_RC);
+	listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
+				   RDMA_PS_TCP, IB_QPT_RC);
 	if (IS_ERR(listen_id)) {
 		ret = PTR_ERR(listen_id);
 		dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -732,7 +790,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 {
 	struct ib_mr *mr;
-	struct ib_fast_reg_page_list *pl;
+	struct scatterlist *sg;
 	struct svc_rdma_fastreg_mr *frmr;
 	u32 num_sg;
 
@@ -745,13 +803,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 	if (IS_ERR(mr))
 		goto err_free_frmr;
 
-	pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
-					 num_sg);
-	if (IS_ERR(pl))
+	sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
+	if (!sg)
 		goto err_free_mr;
 
+	sg_init_table(sg, RPCSVC_MAXPAGES);
+
 	frmr->mr = mr;
-	frmr->page_list = pl;
+	frmr->sg = sg;
 	INIT_LIST_HEAD(&frmr->frmr_list);
 	return frmr;
 
@@ -771,8 +830,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
 		frmr = list_entry(xprt->sc_frmr_q.next,
 				  struct svc_rdma_fastreg_mr, frmr_list);
 		list_del_init(&frmr->frmr_list);
+		kfree(frmr->sg);
 		ib_dereg_mr(frmr->mr);
-		ib_free_fast_reg_page_list(frmr->page_list);
 		kfree(frmr);
 	}
 }
@@ -786,8 +845,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 		frmr = list_entry(rdma->sc_frmr_q.next,
 				  struct svc_rdma_fastreg_mr, frmr_list);
 		list_del_init(&frmr->frmr_list);
-		frmr->map_len = 0;
-		frmr->page_list_len = 0;
+		frmr->sg_nents = 0;
 	}
 	spin_unlock_bh(&rdma->sc_frmr_q_lock);
 	if (frmr)
@@ -796,25 +854,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
 	return rdma_alloc_frmr(rdma);
 }
 
-static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
-			   struct svc_rdma_fastreg_mr *frmr)
-{
-	int page_no;
-	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
-		dma_addr_t addr = frmr->page_list->page_list[page_no];
-		if (ib_dma_mapping_error(frmr->mr->device, addr))
-			continue;
-		atomic_dec(&xprt->sc_dma_used);
-		ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
-				  frmr->direction);
-	}
-}
-
 void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
 		       struct svc_rdma_fastreg_mr *frmr)
 {
 	if (frmr) {
-		frmr_unmap_dma(rdma, frmr);
+		ib_dma_unmap_sg(rdma->sc_cm_id->device,
+				frmr->sg, frmr->sg_nents, frmr->direction);
+		atomic_dec(&rdma->sc_dma_used);
 		spin_lock_bh(&rdma->sc_frmr_q_lock);
 		WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
 		list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 41e452bc580c..8c545f7d7525 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -676,7 +676,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 static int
 xprt_rdma_enable_swap(struct rpc_xprt *xprt)
 {
-	return -EINVAL;
+	return 0;
 }
 
 static void
@@ -705,7 +705,13 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
 	.print_stats		= xprt_rdma_print_stats,
 	.enable_swap		= xprt_rdma_enable_swap,
 	.disable_swap		= xprt_rdma_disable_swap,
-	.inject_disconnect	= xprt_rdma_inject_disconnect
+	.inject_disconnect	= xprt_rdma_inject_disconnect,
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+	.bc_setup		= xprt_rdma_bc_setup,
+	.bc_up			= xprt_rdma_bc_up,
+	.bc_free_rqst		= xprt_rdma_bc_free_rqst,
+	.bc_destroy		= xprt_rdma_bc_destroy,
+#endif
 };
 
 static struct xprt_class xprt_rdma = {
@@ -732,6 +738,7 @@ void xprt_rdma_cleanup(void)
 		dprintk("RPC:       %s: xprt_unregister returned %i\n",
 			__func__, rc);
 
+	rpcrdma_destroy_wq();
 	frwr_destroy_recovery_wq();
 }
 
@@ -743,8 +750,15 @@ int xprt_rdma_init(void)
 	if (rc)
 		return rc;
 
+	rc = rpcrdma_alloc_wq();
+	if (rc) {
+		frwr_destroy_recovery_wq();
+		return rc;
+	}
+
 	rc = xprt_register_transport(&xprt_rdma);
 	if (rc) {
+		rpcrdma_destroy_wq();
 		frwr_destroy_recovery_wq();
 		return rc;
 	}
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5502d4dade74..eadd1655145a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -68,47 +68,33 @@
  * internal functions
  */
 
-/*
- * handle replies in tasklet context, using a single, global list
- * rdma tasklet function -- just turn around and call the func
- * for all replies on the list
- */
-
-static DEFINE_SPINLOCK(rpcrdma_tk_lock_g);
-static LIST_HEAD(rpcrdma_tasklets_g);
+static struct workqueue_struct *rpcrdma_receive_wq;
 
-static void
-rpcrdma_run_tasklet(unsigned long data)
+int
+rpcrdma_alloc_wq(void)
 {
-	struct rpcrdma_rep *rep;
-	unsigned long flags;
-
-	data = data;
-	spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
-	while (!list_empty(&rpcrdma_tasklets_g)) {
-		rep = list_entry(rpcrdma_tasklets_g.next,
-				 struct rpcrdma_rep, rr_list);
-		list_del(&rep->rr_list);
-		spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+	struct workqueue_struct *recv_wq;
 
-		rpcrdma_reply_handler(rep);
+	recv_wq = alloc_workqueue("xprtrdma_receive",
+				  WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
+				  0);
+	if (!recv_wq)
+		return -ENOMEM;
 
-		spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
-	}
-	spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
+	rpcrdma_receive_wq = recv_wq;
+	return 0;
 }
 
-static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
-
-static void
-rpcrdma_schedule_tasklet(struct list_head *sched_list)
+void
+rpcrdma_destroy_wq(void)
 {
-	unsigned long flags;
+	struct workqueue_struct *wq;
 
-	spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
-	list_splice_tail(sched_list, &rpcrdma_tasklets_g);
-	spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
-	tasklet_schedule(&rpcrdma_tasklet_g);
+	if (rpcrdma_receive_wq) {
+		wq = rpcrdma_receive_wq;
+		rpcrdma_receive_wq = NULL;
+		destroy_workqueue(wq);
+	}
 }
 
 static void
@@ -158,63 +144,54 @@ rpcrdma_sendcq_process_wc(struct ib_wc *wc)
 	}
 }
 
-static int
-rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+/* The common case is a single send completion is waiting. By
+ * passing two WC entries to ib_poll_cq, a return code of 1
+ * means there is exactly one WC waiting and no more. We don't
+ * have to invoke ib_poll_cq again to know that the CQ has been
+ * properly drained.
+ */
+static void
+rpcrdma_sendcq_poll(struct ib_cq *cq)
 {
-	struct ib_wc *wcs;
-	int budget, count, rc;
+	struct ib_wc *pos, wcs[2];
+	int count, rc;
 
-	budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
 	do {
-		wcs = ep->rep_send_wcs;
+		pos = wcs;
 
-		rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
-		if (rc <= 0)
-			return rc;
+		rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
+		if (rc < 0)
+			break;
 
 		count = rc;
 		while (count-- > 0)
-			rpcrdma_sendcq_process_wc(wcs++);
-	} while (rc == RPCRDMA_POLLSIZE && --budget);
-	return 0;
+			rpcrdma_sendcq_process_wc(pos++);
+	} while (rc == ARRAY_SIZE(wcs));
+	return;
 }
 
-/*
- * Handle send, fast_reg_mr, and local_inv completions.
- *
- * Send events are typically suppressed and thus do not result
- * in an upcall. Occasionally one is signaled, however. This
- * prevents the provider's completion queue from wrapping and
- * losing a completion.
+/* Handle provider send completion upcalls.
  */
 static void
 rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
 {
-	struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
-	int rc;
-
-	rc = rpcrdma_sendcq_poll(cq, ep);
-	if (rc) {
-		dprintk("RPC:       %s: ib_poll_cq failed: %i\n",
-			__func__, rc);
-		return;
-	}
+	do {
+		rpcrdma_sendcq_poll(cq);
+	} while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
+				  IB_CQ_REPORT_MISSED_EVENTS) > 0);
+}
 
-	rc = ib_req_notify_cq(cq,
-			IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
-	if (rc == 0)
-		return;
-	if (rc < 0) {
-		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
-			__func__, rc);
-		return;
-	}
+static void
+rpcrdma_receive_worker(struct work_struct *work)
+{
+	struct rpcrdma_rep *rep =
+			container_of(work, struct rpcrdma_rep, rr_work);
 
-	rpcrdma_sendcq_poll(cq, ep);
+	rpcrdma_reply_handler(rep);
 }
 
 static void
-rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
+rpcrdma_recvcq_process_wc(struct ib_wc *wc)
 {
 	struct rpcrdma_rep *rep =
 			(struct rpcrdma_rep *)(unsigned long)wc->wr_id;
@@ -237,91 +214,60 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list)
 	prefetch(rdmab_to_msg(rep->rr_rdmabuf));
 
 out_schedule:
-	list_add_tail(&rep->rr_list, sched_list);
+	queue_work(rpcrdma_receive_wq, &rep->rr_work);
 	return;
+
 out_fail:
 	if (wc->status != IB_WC_WR_FLUSH_ERR)
 		pr_err("RPC:       %s: rep %p: %s\n",
 		       __func__, rep, ib_wc_status_msg(wc->status));
-	rep->rr_len = ~0U;
+	rep->rr_len = RPCRDMA_BAD_LEN;
 	goto out_schedule;
 }
 
-static int
-rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+/* The wc array is on stack: automatic memory is always CPU-local.
+ *
+ * struct ib_wc is 64 bytes, making the poll array potentially
+ * large. But this is at the bottom of the call chain. Further
+ * substantial work is done in another thread.
+ */
+static void
+rpcrdma_recvcq_poll(struct ib_cq *cq)
 {
-	struct list_head sched_list;
-	struct ib_wc *wcs;
-	int budget, count, rc;
+	struct ib_wc *pos, wcs[4];
+	int count, rc;
 
-	INIT_LIST_HEAD(&sched_list);
-	budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
 	do {
-		wcs = ep->rep_recv_wcs;
+		pos = wcs;
 
-		rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
-		if (rc <= 0)
-			goto out_schedule;
+		rc = ib_poll_cq(cq, ARRAY_SIZE(wcs), pos);
+		if (rc < 0)
+			break;
 
 		count = rc;
 		while (count-- > 0)
-			rpcrdma_recvcq_process_wc(wcs++, &sched_list);
-	} while (rc == RPCRDMA_POLLSIZE && --budget);
-	rc = 0;
-
-out_schedule:
-	rpcrdma_schedule_tasklet(&sched_list);
-	return rc;
+			rpcrdma_recvcq_process_wc(pos++);
+	} while (rc == ARRAY_SIZE(wcs));
 }
 
-/*
- * Handle receive completions.
- *
- * It is reentrant but processes single events in order to maintain
- * ordering of receives to keep server credits.
- *
- * It is the responsibility of the scheduled tasklet to return
- * recv buffers to the pool. NOTE: this affects synchronization of
- * connection shutdown. That is, the structures required for
- * the completion of the reply handler must remain intact until
- * all memory has been reclaimed.
+/* Handle provider receive completion upcalls.
  */
 static void
 rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
 {
-	struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
-	int rc;
-
-	rc = rpcrdma_recvcq_poll(cq, ep);
-	if (rc) {
-		dprintk("RPC:       %s: ib_poll_cq failed: %i\n",
-			__func__, rc);
-		return;
-	}
-
-	rc = ib_req_notify_cq(cq,
-			IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
-	if (rc == 0)
-		return;
-	if (rc < 0) {
-		dprintk("RPC:       %s: ib_req_notify_cq failed: %i\n",
-			__func__, rc);
-		return;
-	}
-
-	rpcrdma_recvcq_poll(cq, ep);
+	do {
+		rpcrdma_recvcq_poll(cq);
+	} while (ib_req_notify_cq(cq, IB_CQ_NEXT_COMP |
+				  IB_CQ_REPORT_MISSED_EVENTS) > 0);
 }
 
 static void
 rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
 {
 	struct ib_wc wc;
-	LIST_HEAD(sched_list);
 
 	while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
-		rpcrdma_recvcq_process_wc(&wc, &sched_list);
-	if (!list_empty(&sched_list))
-		rpcrdma_schedule_tasklet(&sched_list);
+		rpcrdma_recvcq_process_wc(&wc);
 	while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
 		rpcrdma_sendcq_process_wc(&wc);
 }
@@ -432,7 +378,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 
 	init_completion(&ia->ri_done);
 
-	id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
+	id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
+			    IB_QPT_RC);
 	if (IS_ERR(id)) {
 		rc = PTR_ERR(id);
 		dprintk("RPC:       %s: rdma_create_id() failed %i\n",
@@ -622,6 +569,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	struct ib_device_attr *devattr = &ia->ri_devattr;
 	struct ib_cq *sendcq, *recvcq;
 	struct ib_cq_init_attr cq_attr = {};
+	unsigned int max_qp_wr;
 	int rc, err;
 
 	if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
@@ -630,18 +578,27 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 		return -ENOMEM;
 	}
 
+	if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
+		dprintk("RPC:       %s: insufficient wqe's available\n",
+			__func__);
+		return -ENOMEM;
+	}
+	max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;
+
 	/* check provider's send/recv wr limits */
-	if (cdata->max_requests > devattr->max_qp_wr)
-		cdata->max_requests = devattr->max_qp_wr;
+	if (cdata->max_requests > max_qp_wr)
+		cdata->max_requests = max_qp_wr;
 
 	ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
 	ep->rep_attr.qp_context = ep;
 	ep->rep_attr.srq = NULL;
 	ep->rep_attr.cap.max_send_wr = cdata->max_requests;
+	ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
 	rc = ia->ri_ops->ro_open(ia, ep, cdata);
 	if (rc)
 		return rc;
 	ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
+	ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
 	ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_IOVS;
 	ep->rep_attr.cap.max_recv_sge = 1;
 	ep->rep_attr.cap.max_inline_data = 0;
@@ -669,7 +626,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
 	cq_attr.cqe = ep->rep_attr.cap.max_send_wr + 1;
 	sendcq = ib_create_cq(ia->ri_device, rpcrdma_sendcq_upcall,
-			      rpcrdma_cq_async_error_upcall, ep, &cq_attr);
+			      rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
 	if (IS_ERR(sendcq)) {
 		rc = PTR_ERR(sendcq);
 		dprintk("RPC:       %s: failed to create send CQ: %i\n",
@@ -686,7 +643,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
 	cq_attr.cqe = ep->rep_attr.cap.max_recv_wr + 1;
 	recvcq = ib_create_cq(ia->ri_device, rpcrdma_recvcq_upcall,
-			      rpcrdma_cq_async_error_upcall, ep, &cq_attr);
+			      rpcrdma_cq_async_error_upcall, NULL, &cq_attr);
 	if (IS_ERR(recvcq)) {
 		rc = PTR_ERR(recvcq);
 		dprintk("RPC:       %s: failed to create recv CQ: %i\n",
@@ -885,7 +842,21 @@ retry:
 		}
 		rc = ep->rep_connected;
 	} else {
+		struct rpcrdma_xprt *r_xprt;
+		unsigned int extras;
+
 		dprintk("RPC:       %s: connected\n", __func__);
+
+		r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+		extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
+
+		if (extras) {
+			rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
+			if (rc)
+				pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
+					__func__, rc);
+				rc = 0;
+		}
 	}
 
 out:
@@ -922,20 +893,25 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
 	}
 }
 
-static struct rpcrdma_req *
+struct rpcrdma_req *
 rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
 {
+	struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
 	struct rpcrdma_req *req;
 
 	req = kzalloc(sizeof(*req), GFP_KERNEL);
 	if (req == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	INIT_LIST_HEAD(&req->rl_free);
+	spin_lock(&buffer->rb_reqslock);
+	list_add(&req->rl_all, &buffer->rb_allreqs);
+	spin_unlock(&buffer->rb_reqslock);
 	req->rl_buffer = &r_xprt->rx_buf;
 	return req;
 }
 
-static struct rpcrdma_rep *
+struct rpcrdma_rep *
 rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
@@ -957,6 +933,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
 
 	rep->rr_device = ia->ri_device;
 	rep->rr_rxprt = r_xprt;
+	INIT_WORK(&rep->rr_work, rpcrdma_receive_worker);
 	return rep;
 
 out_free:
@@ -970,44 +947,21 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-	char *p;
-	size_t len;
 	int i, rc;
 
-	buf->rb_max_requests = cdata->max_requests;
+	buf->rb_max_requests = r_xprt->rx_data.max_requests;
+	buf->rb_bc_srv_max_requests = 0;
 	spin_lock_init(&buf->rb_lock);
 
-	/* Need to allocate:
-	 *   1.  arrays for send and recv pointers
-	 *   2.  arrays of struct rpcrdma_req to fill in pointers
-	 *   3.  array of struct rpcrdma_rep for replies
-	 * Send/recv buffers in req/rep need to be registered
-	 */
-	len = buf->rb_max_requests *
-		(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
-
-	p = kzalloc(len, GFP_KERNEL);
-	if (p == NULL) {
-		dprintk("RPC:       %s: req_t/rep_t/pad kzalloc(%zd) failed\n",
-			__func__, len);
-		rc = -ENOMEM;
-		goto out;
-	}
-	buf->rb_pool = p;	/* for freeing it later */
-
-	buf->rb_send_bufs = (struct rpcrdma_req **) p;
-	p = (char *) &buf->rb_send_bufs[buf->rb_max_requests];
-	buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
-	p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
-
 	rc = ia->ri_ops->ro_init(r_xprt);
 	if (rc)
 		goto out;
 
+	INIT_LIST_HEAD(&buf->rb_send_bufs);
+	INIT_LIST_HEAD(&buf->rb_allreqs);
+	spin_lock_init(&buf->rb_reqslock);
 	for (i = 0; i < buf->rb_max_requests; i++) {
 		struct rpcrdma_req *req;
-		struct rpcrdma_rep *rep;
 
 		req = rpcrdma_create_req(r_xprt);
 		if (IS_ERR(req)) {
@@ -1016,7 +970,13 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 			rc = PTR_ERR(req);
 			goto out;
 		}
-		buf->rb_send_bufs[i] = req;
+		req->rl_backchannel = false;
+		list_add(&req->rl_free, &buf->rb_send_bufs);
+	}
+
+	INIT_LIST_HEAD(&buf->rb_recv_bufs);
+	for (i = 0; i < buf->rb_max_requests + 2; i++) {
+		struct rpcrdma_rep *rep;
 
 		rep = rpcrdma_create_rep(r_xprt);
 		if (IS_ERR(rep)) {
@@ -1025,7 +985,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 			rc = PTR_ERR(rep);
 			goto out;
 		}
-		buf->rb_recv_bufs[i] = rep;
+		list_add(&rep->rr_list, &buf->rb_recv_bufs);
 	}
 
 	return 0;
@@ -1034,22 +994,38 @@ out:
 	return rc;
 }
 
+static struct rpcrdma_req *
+rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
+{
+	struct rpcrdma_req *req;
+
+	req = list_first_entry(&buf->rb_send_bufs,
+			       struct rpcrdma_req, rl_free);
+	list_del(&req->rl_free);
+	return req;
+}
+
+static struct rpcrdma_rep *
+rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
+{
+	struct rpcrdma_rep *rep;
+
+	rep = list_first_entry(&buf->rb_recv_bufs,
+			       struct rpcrdma_rep, rr_list);
+	list_del(&rep->rr_list);
+	return rep;
+}
+
 static void
 rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep)
 {
-	if (!rep)
-		return;
-
 	rpcrdma_free_regbuf(ia, rep->rr_rdmabuf);
 	kfree(rep);
 }
 
-static void
+void
 rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
 {
-	if (!req)
-		return;
-
 	rpcrdma_free_regbuf(ia, req->rl_sendbuf);
 	rpcrdma_free_regbuf(ia, req->rl_rdmabuf);
 	kfree(req);
@@ -1059,25 +1035,29 @@ void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
 	struct rpcrdma_ia *ia = rdmab_to_ia(buf);
-	int i;
 
-	/* clean up in reverse order from create
-	 *   1.  recv mr memory (mr free, then kfree)
-	 *   2.  send mr memory (mr free, then kfree)
-	 *   3.  MWs
-	 */
-	dprintk("RPC:       %s: entering\n", __func__);
+	while (!list_empty(&buf->rb_recv_bufs)) {
+		struct rpcrdma_rep *rep;
 
-	for (i = 0; i < buf->rb_max_requests; i++) {
-		if (buf->rb_recv_bufs)
-			rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]);
-		if (buf->rb_send_bufs)
-			rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
+		rep = rpcrdma_buffer_get_rep_locked(buf);
+		rpcrdma_destroy_rep(ia, rep);
 	}
 
-	ia->ri_ops->ro_destroy(buf);
+	spin_lock(&buf->rb_reqslock);
+	while (!list_empty(&buf->rb_allreqs)) {
+		struct rpcrdma_req *req;
+
+		req = list_first_entry(&buf->rb_allreqs,
+				       struct rpcrdma_req, rl_all);
+		list_del(&req->rl_all);
+
+		spin_unlock(&buf->rb_reqslock);
+		rpcrdma_destroy_req(ia, req);
+		spin_lock(&buf->rb_reqslock);
+	}
+	spin_unlock(&buf->rb_reqslock);
 
-	kfree(buf->rb_pool);
+	ia->ri_ops->ro_destroy(buf);
 }
 
 struct rpcrdma_mw *
@@ -1109,53 +1089,34 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
 	spin_unlock(&buf->rb_mwlock);
 }
 
-static void
-rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
-{
-	buf->rb_send_bufs[--buf->rb_send_index] = req;
-	req->rl_niovs = 0;
-	if (req->rl_reply) {
-		buf->rb_recv_bufs[--buf->rb_recv_index] = req->rl_reply;
-		req->rl_reply = NULL;
-	}
-}
-
 /*
  * Get a set of request/reply buffers.
  *
- * Reply buffer (if needed) is attached to send buffer upon return.
- * Rule:
- *    rb_send_index and rb_recv_index MUST always be pointing to the
- *    *next* available buffer (non-NULL). They are incremented after
- *    removing buffers, and decremented *before* returning them.
+ * Reply buffer (if available) is attached to send buffer upon return.
  */
 struct rpcrdma_req *
 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
 {
 	struct rpcrdma_req *req;
-	unsigned long flags;
-
-	spin_lock_irqsave(&buffers->rb_lock, flags);
 
-	if (buffers->rb_send_index == buffers->rb_max_requests) {
-		spin_unlock_irqrestore(&buffers->rb_lock, flags);
-		dprintk("RPC:       %s: out of request buffers\n", __func__);
-		return ((struct rpcrdma_req *)NULL);
-	}
-
-	req = buffers->rb_send_bufs[buffers->rb_send_index];
-	if (buffers->rb_send_index < buffers->rb_recv_index) {
-		dprintk("RPC:       %s: %d extra receives outstanding (ok)\n",
-			__func__,
-			buffers->rb_recv_index - buffers->rb_send_index);
-		req->rl_reply = NULL;
-	} else {
-		req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
-		buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
-	}
-	buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
+	spin_lock(&buffers->rb_lock);
+	if (list_empty(&buffers->rb_send_bufs))
+		goto out_reqbuf;
+	req = rpcrdma_buffer_get_req_locked(buffers);
+	if (list_empty(&buffers->rb_recv_bufs))
+		goto out_repbuf;
+	req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+	spin_unlock(&buffers->rb_lock);
+	return req;
 
-	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+out_reqbuf:
+	spin_unlock(&buffers->rb_lock);
+	pr_warn("RPC:       %s: out of request buffers\n", __func__);
+	return NULL;
+out_repbuf:
+	spin_unlock(&buffers->rb_lock);
+	pr_warn("RPC:       %s: out of reply buffers\n", __func__);
+	req->rl_reply = NULL;
 	return req;
 }
 
@@ -1167,30 +1128,31 @@ void
 rpcrdma_buffer_put(struct rpcrdma_req *req)
 {
 	struct rpcrdma_buffer *buffers = req->rl_buffer;
-	unsigned long flags;
+	struct rpcrdma_rep *rep = req->rl_reply;
 
-	spin_lock_irqsave(&buffers->rb_lock, flags);
-	rpcrdma_buffer_put_sendbuf(req, buffers);
-	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+	req->rl_niovs = 0;
+	req->rl_reply = NULL;
+
+	spin_lock(&buffers->rb_lock);
+	list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
+	if (rep)
+		list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+	spin_unlock(&buffers->rb_lock);
 }
 
 /*
  * Recover reply buffers from pool.
- * This happens when recovering from error conditions.
- * Post-increment counter/array index.
+ * This happens when recovering from disconnect.
  */
 void
 rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
 {
 	struct rpcrdma_buffer *buffers = req->rl_buffer;
-	unsigned long flags;
 
-	spin_lock_irqsave(&buffers->rb_lock, flags);
-	if (buffers->rb_recv_index < buffers->rb_max_requests) {
-		req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index];
-		buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL;
-	}
-	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+	spin_lock(&buffers->rb_lock);
+	if (!list_empty(&buffers->rb_recv_bufs))
+		req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+	spin_unlock(&buffers->rb_lock);
 }
 
 /*
@@ -1201,11 +1163,10 @@ void
 rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
 {
 	struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
-	unsigned long flags;
 
-	spin_lock_irqsave(&buffers->rb_lock, flags);
-	buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep;
-	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+	spin_lock(&buffers->rb_lock);
+	list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+	spin_unlock(&buffers->rb_lock);
 }
 
 /*
@@ -1362,6 +1323,47 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
 	return rc;
 }
 
+/**
+ * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
+ * @r_xprt: transport associated with these backchannel resources
+ * @min_reqs: minimum number of incoming requests expected
+ *
+ * Returns zero if all requested buffers were posted, or a negative errno.
+ */
+int
+rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
+{
+	struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
+	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+	struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+	struct rpcrdma_rep *rep;
+	unsigned long flags;
+	int rc;
+
+	while (count--) {
+		spin_lock_irqsave(&buffers->rb_lock, flags);
+		if (list_empty(&buffers->rb_recv_bufs))
+			goto out_reqbuf;
+		rep = rpcrdma_buffer_get_rep_locked(buffers);
+		spin_unlock_irqrestore(&buffers->rb_lock, flags);
+
+		rc = rpcrdma_ep_post_recv(ia, ep, rep);
+		if (rc)
+			goto out_rc;
+	}
+
+	return 0;
+
+out_reqbuf:
+	spin_unlock_irqrestore(&buffers->rb_lock, flags);
+	pr_warn("%s: no extra receive buffers\n", __func__);
+	return -ENOMEM;
+
+out_rc:
+	rpcrdma_recv_buffer_put(rep);
+	return rc;
+}
+
 /* How many chunk list items fit within our inline buffers?
  */
 unsigned int
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c09414e6f91b..ac7f8d4f632a 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -77,9 +77,6 @@ struct rpcrdma_ia {
  * RDMA Endpoint -- one per transport instance
  */
 
-#define RPCRDMA_WC_BUDGET	(128)
-#define RPCRDMA_POLLSIZE	(16)
-
 struct rpcrdma_ep {
 	atomic_t		rep_cqcount;
 	int			rep_cqinit;
@@ -89,8 +86,6 @@ struct rpcrdma_ep {
 	struct rdma_conn_param	rep_remote_cma;
 	struct sockaddr_storage	rep_remote_addr;
 	struct delayed_work	rep_connect_worker;
-	struct ib_wc		rep_send_wcs[RPCRDMA_POLLSIZE];
-	struct ib_wc		rep_recv_wcs[RPCRDMA_POLLSIZE];
 };
 
 /*
@@ -106,6 +101,16 @@ struct rpcrdma_ep {
  */
 #define RPCRDMA_IGNORE_COMPLETION	(0ULL)
 
+/* Pre-allocate extra Work Requests for handling backward receives
+ * and sends. This is a fixed value because the Work Queues are
+ * allocated when the forward channel is set up.
+ */
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+#define RPCRDMA_BACKWARD_WRS		(8)
+#else
+#define RPCRDMA_BACKWARD_WRS		(0)
+#endif
+
 /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
  *
  * The below structure appears at the front of a large region of kmalloc'd
@@ -169,10 +174,13 @@ struct rpcrdma_rep {
 	unsigned int		rr_len;
 	struct ib_device	*rr_device;
 	struct rpcrdma_xprt	*rr_rxprt;
+	struct work_struct	rr_work;
 	struct list_head	rr_list;
 	struct rpcrdma_regbuf	*rr_rdmabuf;
 };
 
+#define RPCRDMA_BAD_LEN		(~0U)
+
 /*
  * struct rpcrdma_mw - external memory region metadata
  *
@@ -193,7 +201,8 @@ enum rpcrdma_frmr_state {
 };
 
 struct rpcrdma_frmr {
-	struct ib_fast_reg_page_list	*fr_pgl;
+	struct scatterlist		*sg;
+	int				sg_nents;
 	struct ib_mr			*fr_mr;
 	enum rpcrdma_frmr_state		fr_state;
 	struct work_struct		fr_work;
@@ -255,6 +264,7 @@ struct rpcrdma_mr_seg {		/* chunk descriptors */
 #define RPCRDMA_MAX_IOVS	(2)
 
 struct rpcrdma_req {
+	struct list_head	rl_free;
 	unsigned int		rl_niovs;
 	unsigned int		rl_nchunks;
 	unsigned int		rl_connect_cookie;
@@ -264,6 +274,9 @@ struct rpcrdma_req {
 	struct rpcrdma_regbuf	*rl_rdmabuf;
 	struct rpcrdma_regbuf	*rl_sendbuf;
 	struct rpcrdma_mr_seg	rl_segments[RPCRDMA_MAX_SEGS];
+
+	struct list_head	rl_all;
+	bool			rl_backchannel;
 };
 
 static inline struct rpcrdma_req *
@@ -288,12 +301,14 @@ struct rpcrdma_buffer {
 	struct list_head	rb_all;
 	char			*rb_pool;
 
-	spinlock_t		rb_lock;	/* protect buf arrays */
+	spinlock_t		rb_lock;	/* protect buf lists */
+	struct list_head	rb_send_bufs;
+	struct list_head	rb_recv_bufs;
 	u32			rb_max_requests;
-	int			rb_send_index;
-	int			rb_recv_index;
-	struct rpcrdma_req	**rb_send_bufs;
-	struct rpcrdma_rep	**rb_recv_bufs;
+
+	u32			rb_bc_srv_max_requests;
+	spinlock_t		rb_reqslock;	/* protect rb_allreqs */
+	struct list_head	rb_allreqs;
 };
 #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
 
@@ -339,6 +354,7 @@ struct rpcrdma_stats {
 	unsigned long		failed_marshal_count;
 	unsigned long		bad_reply_count;
 	unsigned long		nomsg_call_count;
+	unsigned long		bcall_count;
 };
 
 /*
@@ -414,6 +430,9 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
 /*
  * Buffer calls - xprtrdma/verbs.c
  */
+struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
+struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
+void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *);
 int rpcrdma_buffer_create(struct rpcrdma_xprt *);
 void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
 
@@ -430,10 +449,14 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
 			 struct rpcrdma_regbuf *);
 
 unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
+int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
 
 int frwr_alloc_recovery_wq(void);
 void frwr_destroy_recovery_wq(void);
 
+int rpcrdma_alloc_wq(void);
+void rpcrdma_destroy_wq(void);
+
 /*
  * Wrappers for chunk registration, shared by read/write chunk code.
  */
@@ -494,6 +517,18 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
 int xprt_rdma_init(void);
 void xprt_rdma_cleanup(void);
 
+/* Backchannel calls - xprtrdma/backchannel.c
+ */
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
+int xprt_rdma_bc_up(struct svc_serv *, struct net *);
+int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
+void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
+int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
+void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
+void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
+#endif	/* CONFIG_SUNRPC_BACKCHANNEL */
+
 /* Temporary NFS request map cache. Created in svc_rdma.c  */
 extern struct kmem_cache *svc_rdma_map_cachep;
 /* WR context cache. Created in svc_rdma.c  */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1a85e0ed0b48..1d1a70498910 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -360,8 +360,10 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
 		int flags = XS_SENDMSG_FLAGS;
 
 		remainder -= len;
-		if (remainder != 0 || more)
+		if (more)
 			flags |= MSG_MORE;
+		if (remainder != 0)
+			flags |= MSG_SENDPAGE_NOTLAST | MSG_MORE;
 		err = do_sendpage(sock, *ppage, base, len, flags);
 		if (remainder == 0 || err != len)
 			break;
@@ -823,6 +825,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 
 	kernel_sock_shutdown(sock, SHUT_RDWR);
 
+	mutex_lock(&transport->recv_mutex);
 	write_lock_bh(&sk->sk_callback_lock);
 	transport->inet = NULL;
 	transport->sock = NULL;
@@ -833,6 +836,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 	xprt_clear_connected(xprt);
 	write_unlock_bh(&sk->sk_callback_lock);
 	xs_sock_reset_connection_flags(xprt);
+	mutex_unlock(&transport->recv_mutex);
 
 	trace_rpc_socket_close(xprt, sock);
 	sock_release(sock);
@@ -886,6 +890,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
 	cancel_delayed_work_sync(&transport->connect_worker);
 	xs_close(xprt);
+	cancel_work_sync(&transport->recv_worker);
 	xs_xprt_free(xprt);
 	module_put(THIS_MODULE);
 }
@@ -906,44 +911,36 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 }
 
 /**
- * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
- * @sk: socket with data to read
+ * xs_local_data_read_skb
+ * @xprt: transport
+ * @sk: socket
+ * @skb: skbuff
  *
  * Currently this assumes we can read the whole reply in a single gulp.
  */
-static void xs_local_data_ready(struct sock *sk)
+static void xs_local_data_read_skb(struct rpc_xprt *xprt,
+		struct sock *sk,
+		struct sk_buff *skb)
 {
 	struct rpc_task *task;
-	struct rpc_xprt *xprt;
 	struct rpc_rqst *rovr;
-	struct sk_buff *skb;
-	int err, repsize, copied;
+	int repsize, copied;
 	u32 _xid;
 	__be32 *xp;
 
-	read_lock_bh(&sk->sk_callback_lock);
-	dprintk("RPC:       %s...\n", __func__);
-	xprt = xprt_from_sock(sk);
-	if (xprt == NULL)
-		goto out;
-
-	skb = skb_recv_datagram(sk, 0, 1, &err);
-	if (skb == NULL)
-		goto out;
-
 	repsize = skb->len - sizeof(rpc_fraghdr);
 	if (repsize < 4) {
 		dprintk("RPC:       impossible RPC reply size %d\n", repsize);
-		goto dropit;
+		return;
 	}
 
 	/* Copy the XID from the skb... */
 	xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
 	if (xp == NULL)
-		goto dropit;
+		return;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->transport_lock);
+	spin_lock_bh(&xprt->transport_lock);
 	rovr = xprt_lookup_rqst(xprt, *xp);
 	if (!rovr)
 		goto out_unlock;
@@ -961,50 +958,68 @@ static void xs_local_data_ready(struct sock *sk)
 	xprt_complete_rqst(task, copied);
 
  out_unlock:
-	spin_unlock(&xprt->transport_lock);
- dropit:
-	skb_free_datagram(sk, skb);
- out:
-	read_unlock_bh(&sk->sk_callback_lock);
+	spin_unlock_bh(&xprt->transport_lock);
+}
+
+static void xs_local_data_receive(struct sock_xprt *transport)
+{
+	struct sk_buff *skb;
+	struct sock *sk;
+	int err;
+
+	mutex_lock(&transport->recv_mutex);
+	sk = transport->inet;
+	if (sk == NULL)
+		goto out;
+	for (;;) {
+		skb = skb_recv_datagram(sk, 0, 1, &err);
+		if (skb == NULL)
+			break;
+		xs_local_data_read_skb(&transport->xprt, sk, skb);
+		skb_free_datagram(sk, skb);
+	}
+out:
+	mutex_unlock(&transport->recv_mutex);
+}
+
+static void xs_local_data_receive_workfn(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, recv_worker);
+	xs_local_data_receive(transport);
 }
 
 /**
- * xs_udp_data_ready - "data ready" callback for UDP sockets
- * @sk: socket with data to read
+ * xs_udp_data_read_skb - receive callback for UDP sockets
+ * @xprt: transport
+ * @sk: socket
+ * @skb: skbuff
  *
  */
-static void xs_udp_data_ready(struct sock *sk)
+static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
+		struct sock *sk,
+		struct sk_buff *skb)
 {
 	struct rpc_task *task;
-	struct rpc_xprt *xprt;
 	struct rpc_rqst *rovr;
-	struct sk_buff *skb;
-	int err, repsize, copied;
+	int repsize, copied;
 	u32 _xid;
 	__be32 *xp;
 
-	read_lock_bh(&sk->sk_callback_lock);
-	dprintk("RPC:       xs_udp_data_ready...\n");
-	if (!(xprt = xprt_from_sock(sk)))
-		goto out;
-
-	if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
-		goto out;
-
 	repsize = skb->len - sizeof(struct udphdr);
 	if (repsize < 4) {
 		dprintk("RPC:       impossible RPC reply size %d!\n", repsize);
-		goto dropit;
+		return;
 	}
 
 	/* Copy the XID from the skb... */
 	xp = skb_header_pointer(skb, sizeof(struct udphdr),
 				sizeof(_xid), &_xid);
 	if (xp == NULL)
-		goto dropit;
+		return;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->transport_lock);
+	spin_lock_bh(&xprt->transport_lock);
 	rovr = xprt_lookup_rqst(xprt, *xp);
 	if (!rovr)
 		goto out_unlock;
@@ -1025,10 +1040,54 @@ static void xs_udp_data_ready(struct sock *sk)
 	xprt_complete_rqst(task, copied);
 
  out_unlock:
-	spin_unlock(&xprt->transport_lock);
- dropit:
-	skb_free_datagram(sk, skb);
- out:
+	spin_unlock_bh(&xprt->transport_lock);
+}
+
+static void xs_udp_data_receive(struct sock_xprt *transport)
+{
+	struct sk_buff *skb;
+	struct sock *sk;
+	int err;
+
+	mutex_lock(&transport->recv_mutex);
+	sk = transport->inet;
+	if (sk == NULL)
+		goto out;
+	for (;;) {
+		skb = skb_recv_datagram(sk, 0, 1, &err);
+		if (skb == NULL)
+			break;
+		xs_udp_data_read_skb(&transport->xprt, sk, skb);
+		skb_free_datagram(sk, skb);
+	}
+out:
+	mutex_unlock(&transport->recv_mutex);
+}
+
+static void xs_udp_data_receive_workfn(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, recv_worker);
+	xs_udp_data_receive(transport);
+}
+
+/**
+ * xs_data_ready - "data ready" callback for UDP sockets
+ * @sk: socket with data to read
+ *
+ */
+static void xs_data_ready(struct sock *sk)
+{
+	struct rpc_xprt *xprt;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	dprintk("RPC:       xs_data_ready...\n");
+	xprt = xprt_from_sock(sk);
+	if (xprt != NULL) {
+		struct sock_xprt *transport = container_of(xprt,
+				struct sock_xprt, xprt);
+		queue_work(rpciod_workqueue, &transport->recv_worker);
+	}
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
@@ -1243,12 +1302,12 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
 
 	/* Find and lock the request corresponding to this xid */
-	spin_lock(&xprt->transport_lock);
+	spin_lock_bh(&xprt->transport_lock);
 	req = xprt_lookup_rqst(xprt, transport->tcp_xid);
 	if (!req) {
 		dprintk("RPC:       XID %08x request not found!\n",
 				ntohl(transport->tcp_xid));
-		spin_unlock(&xprt->transport_lock);
+		spin_unlock_bh(&xprt->transport_lock);
 		return -1;
 	}
 
@@ -1257,7 +1316,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
 		xprt_complete_rqst(req->rq_task, transport->tcp_copied);
 
-	spin_unlock(&xprt->transport_lock);
+	spin_unlock_bh(&xprt->transport_lock);
 	return 0;
 }
 
@@ -1277,10 +1336,10 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 	struct rpc_rqst *req;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->transport_lock);
+	spin_lock_bh(&xprt->transport_lock);
 	req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
 	if (req == NULL) {
-		spin_unlock(&xprt->transport_lock);
+		spin_unlock_bh(&xprt->transport_lock);
 		printk(KERN_WARNING "Callback slot table overflowed\n");
 		xprt_force_disconnect(xprt);
 		return -1;
@@ -1291,7 +1350,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 
 	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
 		xprt_complete_bc_request(req, transport->tcp_copied);
-	spin_unlock(&xprt->transport_lock);
+	spin_unlock_bh(&xprt->transport_lock);
 
 	return 0;
 }
@@ -1306,6 +1365,17 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 		xs_tcp_read_reply(xprt, desc) :
 		xs_tcp_read_callback(xprt, desc);
 }
+
+static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
+{
+	int ret;
+
+	ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
+			      SVC_SOCK_ANONYMOUS);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
 #else
 static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 					struct xdr_skb_reader *desc)
@@ -1391,6 +1461,44 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
 	return len - desc.count;
 }
 
+static void xs_tcp_data_receive(struct sock_xprt *transport)
+{
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct sock *sk;
+	read_descriptor_t rd_desc = {
+		.count = 2*1024*1024,
+		.arg.data = xprt,
+	};
+	unsigned long total = 0;
+	int read = 0;
+
+	mutex_lock(&transport->recv_mutex);
+	sk = transport->inet;
+	if (sk == NULL)
+		goto out;
+
+	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
+	for (;;) {
+		lock_sock(sk);
+		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
+		release_sock(sk);
+		if (read <= 0)
+			break;
+		total += read;
+		rd_desc.count = 65536;
+	}
+out:
+	mutex_unlock(&transport->recv_mutex);
+	trace_xs_tcp_data_ready(xprt, read, total);
+}
+
+static void xs_tcp_data_receive_workfn(struct work_struct *work)
+{
+	struct sock_xprt *transport =
+		container_of(work, struct sock_xprt, recv_worker);
+	xs_tcp_data_receive(transport);
+}
+
 /**
  * xs_tcp_data_ready - "data ready" callback for TCP sockets
  * @sk: socket with data to read
@@ -1398,34 +1506,24 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
  */
 static void xs_tcp_data_ready(struct sock *sk)
 {
+	struct sock_xprt *transport;
 	struct rpc_xprt *xprt;
-	read_descriptor_t rd_desc;
-	int read;
-	unsigned long total = 0;
 
 	dprintk("RPC:       xs_tcp_data_ready...\n");
 
 	read_lock_bh(&sk->sk_callback_lock);
-	if (!(xprt = xprt_from_sock(sk))) {
-		read = 0;
+	if (!(xprt = xprt_from_sock(sk)))
 		goto out;
-	}
+	transport = container_of(xprt, struct sock_xprt, xprt);
+
 	/* Any data means we had a useful conversation, so
 	 * the we don't need to delay the next reconnect
 	 */
 	if (xprt->reestablish_timeout)
 		xprt->reestablish_timeout = 0;
+	queue_work(rpciod_workqueue, &transport->recv_worker);
 
-	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
-	rd_desc.arg.data = xprt;
-	do {
-		rd_desc.count = 65536;
-		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
-		if (read > 0)
-			total += read;
-	} while (read > 0);
 out:
-	trace_xs_tcp_data_ready(xprt, read, total);
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
@@ -1873,7 +1971,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
 		xs_save_old_callbacks(transport, sk);
 
 		sk->sk_user_data = xprt;
-		sk->sk_data_ready = xs_local_data_ready;
+		sk->sk_data_ready = xs_data_ready;
 		sk->sk_write_space = xs_udp_write_space;
 		sk->sk_error_report = xs_error_report;
 		sk->sk_allocation = GFP_NOIO;
@@ -2059,7 +2157,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 		xs_save_old_callbacks(transport, sk);
 
 		sk->sk_user_data = xprt;
-		sk->sk_data_ready = xs_udp_data_ready;
+		sk->sk_data_ready = xs_data_ready;
 		sk->sk_write_space = xs_udp_write_space;
 		sk->sk_allocation = GFP_NOIO;
 
@@ -2472,7 +2570,7 @@ static int bc_send_request(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct svc_xprt	*xprt;
-	u32                     len;
+	int len;
 
 	dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
 	/*
@@ -2580,6 +2678,12 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.enable_swap		= xs_enable_swap,
 	.disable_swap		= xs_disable_swap,
 	.inject_disconnect	= xs_inject_disconnect,
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+	.bc_setup		= xprt_setup_bc,
+	.bc_up			= xs_tcp_bc_up,
+	.bc_free_rqst		= xprt_free_bc_rqst,
+	.bc_destroy		= xprt_destroy_bc,
+#endif
 };
 
 /*
@@ -2650,6 +2754,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 	}
 
 	new = container_of(xprt, struct sock_xprt, xprt);
+	mutex_init(&new->recv_mutex);
 	memcpy(&xprt->addr, args->dstaddr, args->addrlen);
 	xprt->addrlen = args->addrlen;
 	if (args->srcaddr)
@@ -2703,6 +2808,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
 	xprt->ops = &xs_local_ops;
 	xprt->timeout = &xs_local_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn);
 	INIT_DELAYED_WORK(&transport->connect_worker,
 			xs_dummy_setup_socket);
 
@@ -2774,21 +2880,20 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 	xprt->timeout = &xs_udp_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_udp_data_receive_workfn);
+	INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_setup_socket);
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_udp_setup_socket);
 		xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
 		break;
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_udp_setup_socket);
 		xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
 		break;
 	default:
@@ -2853,21 +2958,20 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	xprt->ops = &xs_tcp_ops;
 	xprt->timeout = &xs_tcp_default_timeout;
 
+	INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
+	INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_tcp_setup_socket);
 		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
 		break;
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
 			xprt_set_bound(xprt);
 
-		INIT_DELAYED_WORK(&transport->connect_worker,
-					xs_tcp_setup_socket);
 		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
 		break;
 	default:
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 77f5d17e2612..f34e535e93bd 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1,6 +1,6 @@
 /*
  * net/switchdev/switchdev.c - Switch device API
- * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
  * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -15,12 +15,166 @@
 #include <linux/mutex.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 #include <linux/if_bridge.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
 #include <linux/if_vlan.h>
 #include <net/ip_fib.h>
 #include <net/switchdev.h>
 
 /**
+ *	switchdev_trans_item_enqueue - Enqueue data item to transaction queue
+ *
+ *	@trans: transaction
+ *	@data: pointer to data being queued
+ *	@destructor: data destructor
+ *	@tritem: transaction item being queued
+ *
+ *	Enqeueue data item to transaction queue. tritem is typically placed in
+ *	cointainter pointed at by data pointer. Destructor is called on
+ *	transaction abort and after successful commit phase in case
+ *	the caller did not dequeue the item before.
+ */
+void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
+				  void *data, void (*destructor)(void const *),
+				  struct switchdev_trans_item *tritem)
+{
+	tritem->data = data;
+	tritem->destructor = destructor;
+	list_add_tail(&tritem->list, &trans->item_list);
+}
+EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
+
+static struct switchdev_trans_item *
+__switchdev_trans_item_dequeue(struct switchdev_trans *trans)
+{
+	struct switchdev_trans_item *tritem;
+
+	if (list_empty(&trans->item_list))
+		return NULL;
+	tritem = list_first_entry(&trans->item_list,
+				  struct switchdev_trans_item, list);
+	list_del(&tritem->list);
+	return tritem;
+}
+
+/**
+ *	switchdev_trans_item_dequeue - Dequeue data item from transaction queue
+ *
+ *	@trans: transaction
+ */
+void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
+{
+	struct switchdev_trans_item *tritem;
+
+	tritem = __switchdev_trans_item_dequeue(trans);
+	BUG_ON(!tritem);
+	return tritem->data;
+}
+EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
+
+static void switchdev_trans_init(struct switchdev_trans *trans)
+{
+	INIT_LIST_HEAD(&trans->item_list);
+}
+
+static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
+{
+	struct switchdev_trans_item *tritem;
+
+	while ((tritem = __switchdev_trans_item_dequeue(trans)))
+		tritem->destructor(tritem->data);
+}
+
+static void switchdev_trans_items_warn_destroy(struct net_device *dev,
+					       struct switchdev_trans *trans)
+{
+	WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
+	     dev->name);
+	switchdev_trans_items_destroy(trans);
+}
+
+static LIST_HEAD(deferred);
+static DEFINE_SPINLOCK(deferred_lock);
+
+typedef void switchdev_deferred_func_t(struct net_device *dev,
+				       const void *data);
+
+struct switchdev_deferred_item {
+	struct list_head list;
+	struct net_device *dev;
+	switchdev_deferred_func_t *func;
+	unsigned long data[0];
+};
+
+static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
+{
+	struct switchdev_deferred_item *dfitem;
+
+	spin_lock_bh(&deferred_lock);
+	if (list_empty(&deferred)) {
+		dfitem = NULL;
+		goto unlock;
+	}
+	dfitem = list_first_entry(&deferred,
+				  struct switchdev_deferred_item, list);
+	list_del(&dfitem->list);
+unlock:
+	spin_unlock_bh(&deferred_lock);
+	return dfitem;
+}
+
+/**
+ *	switchdev_deferred_process - Process ops in deferred queue
+ *
+ *	Called to flush the ops currently queued in deferred ops queue.
+ *	rtnl_lock must be held.
+ */
+void switchdev_deferred_process(void)
+{
+	struct switchdev_deferred_item *dfitem;
+
+	ASSERT_RTNL();
+
+	while ((dfitem = switchdev_deferred_dequeue())) {
+		dfitem->func(dfitem->dev, dfitem->data);
+		dev_put(dfitem->dev);
+		kfree(dfitem);
+	}
+}
+EXPORT_SYMBOL_GPL(switchdev_deferred_process);
+
+static void switchdev_deferred_process_work(struct work_struct *work)
+{
+	rtnl_lock();
+	switchdev_deferred_process();
+	rtnl_unlock();
+}
+
+static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
+
+static int switchdev_deferred_enqueue(struct net_device *dev,
+				      const void *data, size_t data_len,
+				      switchdev_deferred_func_t *func)
+{
+	struct switchdev_deferred_item *dfitem;
+
+	dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
+	if (!dfitem)
+		return -ENOMEM;
+	dfitem->dev = dev;
+	dfitem->func = func;
+	memcpy(dfitem->data, data, data_len);
+	dev_hold(dev);
+	spin_lock_bh(&deferred_lock);
+	list_add_tail(&dfitem->list, &deferred);
+	spin_unlock_bh(&deferred_lock);
+	schedule_work(&deferred_process_work);
+	return 0;
+}
+
+/**
  *	switchdev_port_attr_get - Get port attribute
  *
  *	@dev: port device
@@ -32,7 +186,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
 	struct net_device *lower_dev;
 	struct list_head *iter;
 	struct switchdev_attr first = {
-		.id = SWITCHDEV_ATTR_UNDEFINED
+		.id = SWITCHDEV_ATTR_ID_UNDEFINED
 	};
 	int err = -EOPNOTSUPP;
 
@@ -52,7 +206,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
 		err = switchdev_port_attr_get(lower_dev, attr);
 		if (err)
 			break;
-		if (first.id == SWITCHDEV_ATTR_UNDEFINED)
+		if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
 			first = *attr;
 		else if (memcmp(&first, attr, sizeof(*attr)))
 			return -ENODATA;
@@ -63,18 +217,21 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
 EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
 
 static int __switchdev_port_attr_set(struct net_device *dev,
-				     struct switchdev_attr *attr)
+				     const struct switchdev_attr *attr,
+				     struct switchdev_trans *trans)
 {
 	const struct switchdev_ops *ops = dev->switchdev_ops;
 	struct net_device *lower_dev;
 	struct list_head *iter;
 	int err = -EOPNOTSUPP;
 
-	if (ops && ops->switchdev_port_attr_set)
-		return ops->switchdev_port_attr_set(dev, attr);
+	if (ops && ops->switchdev_port_attr_set) {
+		err = ops->switchdev_port_attr_set(dev, attr, trans);
+		goto done;
+	}
 
 	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
-		return err;
+		goto done;
 
 	/* Switch device port(s) may be stacked under
 	 * bond/team/vlan dev, so recurse down to set attr on
@@ -82,80 +239,25 @@ static int __switchdev_port_attr_set(struct net_device *dev,
 	 */
 
 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = __switchdev_port_attr_set(lower_dev, attr);
+		err = __switchdev_port_attr_set(lower_dev, attr, trans);
 		if (err)
 			break;
 	}
 
-	return err;
-}
-
-struct switchdev_attr_set_work {
-	struct work_struct work;
-	struct net_device *dev;
-	struct switchdev_attr attr;
-};
-
-static void switchdev_port_attr_set_work(struct work_struct *work)
-{
-	struct switchdev_attr_set_work *asw =
-		container_of(work, struct switchdev_attr_set_work, work);
-	int err;
-
-	rtnl_lock();
-	err = switchdev_port_attr_set(asw->dev, &asw->attr);
-	if (err && err != -EOPNOTSUPP)
-		netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
-			   err, asw->attr.id);
-	rtnl_unlock();
-
-	dev_put(asw->dev);
-	kfree(work);
-}
+done:
+	if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+		err = 0;
 
-static int switchdev_port_attr_set_defer(struct net_device *dev,
-					 struct switchdev_attr *attr)
-{
-	struct switchdev_attr_set_work *asw;
-
-	asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
-	if (!asw)
-		return -ENOMEM;
-
-	INIT_WORK(&asw->work, switchdev_port_attr_set_work);
-
-	dev_hold(dev);
-	asw->dev = dev;
-	memcpy(&asw->attr, attr, sizeof(asw->attr));
-
-	schedule_work(&asw->work);
-
-	return 0;
+	return err;
 }
 
-/**
- *	switchdev_port_attr_set - Set port attribute
- *
- *	@dev: port device
- *	@attr: attribute to set
- *
- *	Use a 2-phase prepare-commit transaction model to ensure
- *	system is not left in a partially updated state due to
- *	failure from driver/device.
- */
-int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
+static int switchdev_port_attr_set_now(struct net_device *dev,
+				       const struct switchdev_attr *attr)
 {
+	struct switchdev_trans trans;
 	int err;
 
-	if (!rtnl_is_locked()) {
-		/* Running prepare-commit transaction across stacked
-		 * devices requires nothing moves, so if rtnl_lock is
-		 * not held, schedule a worker thread to hold rtnl_lock
-		 * while setting attr.
-		 */
-
-		return switchdev_port_attr_set_defer(dev, attr);
-	}
+	switchdev_trans_init(&trans);
 
 	/* Phase I: prepare for attr set. Driver/device should fail
 	 * here if there are going to be issues in the commit phase,
@@ -164,18 +266,16 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
 	 * but should not commit the attr.
 	 */
 
-	attr->trans = SWITCHDEV_TRANS_PREPARE;
-	err = __switchdev_port_attr_set(dev, attr);
+	trans.ph_prepare = true;
+	err = __switchdev_port_attr_set(dev, attr, &trans);
 	if (err) {
 		/* Prepare phase failed: abort the transaction.  Any
 		 * resources reserved in the prepare phase are
 		 * released.
 		 */
 
-		if (err != -EOPNOTSUPP) {
-			attr->trans = SWITCHDEV_TRANS_ABORT;
-			__switchdev_port_attr_set(dev, attr);
-		}
+		if (err != -EOPNOTSUPP)
+			switchdev_trans_items_destroy(&trans);
 
 		return err;
 	}
@@ -185,17 +285,75 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
 	 * because the driver said everythings was OK in phase I.
 	 */
 
-	attr->trans = SWITCHDEV_TRANS_COMMIT;
-	err = __switchdev_port_attr_set(dev, attr);
+	trans.ph_prepare = false;
+	err = __switchdev_port_attr_set(dev, attr, &trans);
 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
 	     dev->name, attr->id);
+	switchdev_trans_items_warn_destroy(dev, &trans);
 
 	return err;
 }
+
+static void switchdev_port_attr_set_deferred(struct net_device *dev,
+					     const void *data)
+{
+	const struct switchdev_attr *attr = data;
+	int err;
+
+	err = switchdev_port_attr_set_now(dev, attr);
+	if (err && err != -EOPNOTSUPP)
+		netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
+			   err, attr->id);
+}
+
+static int switchdev_port_attr_set_defer(struct net_device *dev,
+					 const struct switchdev_attr *attr)
+{
+	return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
+					  switchdev_port_attr_set_deferred);
+}
+
+/**
+ *	switchdev_port_attr_set - Set port attribute
+ *
+ *	@dev: port device
+ *	@attr: attribute to set
+ *
+ *	Use a 2-phase prepare-commit transaction model to ensure
+ *	system is not left in a partially updated state due to
+ *	failure from driver/device.
+ *
+ *	rtnl_lock must be held and must not be in atomic section,
+ *	in case SWITCHDEV_F_DEFER flag is not set.
+ */
+int switchdev_port_attr_set(struct net_device *dev,
+			    const struct switchdev_attr *attr)
+{
+	if (attr->flags & SWITCHDEV_F_DEFER)
+		return switchdev_port_attr_set_defer(dev, attr);
+	ASSERT_RTNL();
+	return switchdev_port_attr_set_now(dev, attr);
+}
 EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
 
+static size_t switchdev_obj_size(const struct switchdev_obj *obj)
+{
+	switch (obj->id) {
+	case SWITCHDEV_OBJ_ID_PORT_VLAN:
+		return sizeof(struct switchdev_obj_port_vlan);
+	case SWITCHDEV_OBJ_ID_IPV4_FIB:
+		return sizeof(struct switchdev_obj_ipv4_fib);
+	case SWITCHDEV_OBJ_ID_PORT_FDB:
+		return sizeof(struct switchdev_obj_port_fdb);
+	default:
+		BUG();
+	}
+	return 0;
+}
+
 static int __switchdev_port_obj_add(struct net_device *dev,
-				    struct switchdev_obj *obj)
+				    const struct switchdev_obj *obj,
+				    struct switchdev_trans *trans)
 {
 	const struct switchdev_ops *ops = dev->switchdev_ops;
 	struct net_device *lower_dev;
@@ -203,7 +361,7 @@ static int __switchdev_port_obj_add(struct net_device *dev,
 	int err = -EOPNOTSUPP;
 
 	if (ops && ops->switchdev_port_obj_add)
-		return ops->switchdev_port_obj_add(dev, obj);
+		return ops->switchdev_port_obj_add(dev, obj, trans);
 
 	/* Switch device port(s) may be stacked under
 	 * bond/team/vlan dev, so recurse down to add object on
@@ -211,7 +369,7 @@ static int __switchdev_port_obj_add(struct net_device *dev,
 	 */
 
 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = __switchdev_port_obj_add(lower_dev, obj);
+		err = __switchdev_port_obj_add(lower_dev, obj, trans);
 		if (err)
 			break;
 	}
@@ -219,24 +377,16 @@ static int __switchdev_port_obj_add(struct net_device *dev,
 	return err;
 }
 
-/**
- *	switchdev_port_obj_add - Add port object
- *
- *	@dev: port device
- *	@obj: object to add
- *
- *	Use a 2-phase prepare-commit transaction model to ensure
- *	system is not left in a partially updated state due to
- *	failure from driver/device.
- *
- *	rtnl_lock must be held.
- */
-int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
+static int switchdev_port_obj_add_now(struct net_device *dev,
+				      const struct switchdev_obj *obj)
 {
+	struct switchdev_trans trans;
 	int err;
 
 	ASSERT_RTNL();
 
+	switchdev_trans_init(&trans);
+
 	/* Phase I: prepare for obj add. Driver/device should fail
 	 * here if there are going to be issues in the commit phase,
 	 * such as lack of resources or support.  The driver/device
@@ -244,18 +394,16 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
 	 * but should not commit the obj.
 	 */
 
-	obj->trans = SWITCHDEV_TRANS_PREPARE;
-	err = __switchdev_port_obj_add(dev, obj);
+	trans.ph_prepare = true;
+	err = __switchdev_port_obj_add(dev, obj, &trans);
 	if (err) {
 		/* Prepare phase failed: abort the transaction.  Any
 		 * resources reserved in the prepare phase are
 		 * released.
 		 */
 
-		if (err != -EOPNOTSUPP) {
-			obj->trans = SWITCHDEV_TRANS_ABORT;
-			__switchdev_port_obj_add(dev, obj);
-		}
+		if (err != -EOPNOTSUPP)
+			switchdev_trans_items_destroy(&trans);
 
 		return err;
 	}
@@ -265,21 +413,59 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
 	 * because the driver said everythings was OK in phase I.
 	 */
 
-	obj->trans = SWITCHDEV_TRANS_COMMIT;
-	err = __switchdev_port_obj_add(dev, obj);
+	trans.ph_prepare = false;
+	err = __switchdev_port_obj_add(dev, obj, &trans);
 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
+	switchdev_trans_items_warn_destroy(dev, &trans);
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
+
+static void switchdev_port_obj_add_deferred(struct net_device *dev,
+					    const void *data)
+{
+	const struct switchdev_obj *obj = data;
+	int err;
+
+	err = switchdev_port_obj_add_now(dev, obj);
+	if (err && err != -EOPNOTSUPP)
+		netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
+			   err, obj->id);
+}
+
+static int switchdev_port_obj_add_defer(struct net_device *dev,
+					const struct switchdev_obj *obj)
+{
+	return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
+					  switchdev_port_obj_add_deferred);
+}
 
 /**
- *	switchdev_port_obj_del - Delete port object
+ *	switchdev_port_obj_add - Add port object
  *
  *	@dev: port device
- *	@obj: object to delete
+ *	@id: object ID
+ *	@obj: object to add
+ *
+ *	Use a 2-phase prepare-commit transaction model to ensure
+ *	system is not left in a partially updated state due to
+ *	failure from driver/device.
+ *
+ *	rtnl_lock must be held and must not be in atomic section,
+ *	in case SWITCHDEV_F_DEFER flag is not set.
  */
-int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
+int switchdev_port_obj_add(struct net_device *dev,
+			   const struct switchdev_obj *obj)
+{
+	if (obj->flags & SWITCHDEV_F_DEFER)
+		return switchdev_port_obj_add_defer(dev, obj);
+	ASSERT_RTNL();
+	return switchdev_port_obj_add_now(dev, obj);
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
+
+static int switchdev_port_obj_del_now(struct net_device *dev,
+				      const struct switchdev_obj *obj)
 {
 	const struct switchdev_ops *ops = dev->switchdev_ops;
 	struct net_device *lower_dev;
@@ -295,30 +481,75 @@ int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
 	 */
 
 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = switchdev_port_obj_del(lower_dev, obj);
+		err = switchdev_port_obj_del_now(lower_dev, obj);
 		if (err)
 			break;
 	}
 
 	return err;
 }
+
+static void switchdev_port_obj_del_deferred(struct net_device *dev,
+					    const void *data)
+{
+	const struct switchdev_obj *obj = data;
+	int err;
+
+	err = switchdev_port_obj_del_now(dev, obj);
+	if (err && err != -EOPNOTSUPP)
+		netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
+			   err, obj->id);
+}
+
+static int switchdev_port_obj_del_defer(struct net_device *dev,
+					const struct switchdev_obj *obj)
+{
+	return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj),
+					  switchdev_port_obj_del_deferred);
+}
+
+/**
+ *	switchdev_port_obj_del - Delete port object
+ *
+ *	@dev: port device
+ *	@id: object ID
+ *	@obj: object to delete
+ *
+ *	rtnl_lock must be held and must not be in atomic section,
+ *	in case SWITCHDEV_F_DEFER flag is not set.
+ */
+int switchdev_port_obj_del(struct net_device *dev,
+			   const struct switchdev_obj *obj)
+{
+	if (obj->flags & SWITCHDEV_F_DEFER)
+		return switchdev_port_obj_del_defer(dev, obj);
+	ASSERT_RTNL();
+	return switchdev_port_obj_del_now(dev, obj);
+}
 EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
 
 /**
  *	switchdev_port_obj_dump - Dump port objects
  *
  *	@dev: port device
+ *	@id: object ID
  *	@obj: object to dump
+ *	@cb: function to call with a filled object
+ *
+ *	rtnl_lock must be held.
  */
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
+int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
+			    switchdev_obj_dump_cb_t *cb)
 {
 	const struct switchdev_ops *ops = dev->switchdev_ops;
 	struct net_device *lower_dev;
 	struct list_head *iter;
 	int err = -EOPNOTSUPP;
 
+	ASSERT_RTNL();
+
 	if (ops && ops->switchdev_port_obj_dump)
-		return ops->switchdev_port_obj_dump(dev, obj);
+		return ops->switchdev_port_obj_dump(dev, obj, cb);
 
 	/* Switch device port(s) may be stacked under
 	 * bond/team/vlan dev, so recurse down to dump objects on
@@ -326,7 +557,7 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
 	 */
 
 	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = switchdev_port_obj_dump(lower_dev, obj);
+		err = switchdev_port_obj_dump(lower_dev, obj, cb);
 		break;
 	}
 
@@ -398,7 +629,7 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
 
 struct switchdev_vlan_dump {
-	struct switchdev_obj obj;
+	struct switchdev_obj_port_vlan vlan;
 	struct sk_buff *skb;
 	u32 filter_mask;
 	u16 flags;
@@ -406,8 +637,7 @@ struct switchdev_vlan_dump {
 	u16 end;
 };
 
-static int switchdev_port_vlan_dump_put(struct net_device *dev,
-					struct switchdev_vlan_dump *dump)
+static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
 {
 	struct bridge_vlan_info vinfo;
 
@@ -437,12 +667,11 @@ static int switchdev_port_vlan_dump_put(struct net_device *dev,
 	return 0;
 }
 
-static int switchdev_port_vlan_dump_cb(struct net_device *dev,
-				       struct switchdev_obj *obj)
+static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
 {
+	struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
 	struct switchdev_vlan_dump *dump =
-		container_of(obj, struct switchdev_vlan_dump, obj);
-	struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
+		container_of(vlan, struct switchdev_vlan_dump, vlan);
 	int err = 0;
 
 	if (vlan->vid_begin > vlan->vid_end)
@@ -453,7 +682,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
 		for (dump->begin = dump->end = vlan->vid_begin;
 		     dump->begin <= vlan->vid_end;
 		     dump->begin++, dump->end++) {
-			err = switchdev_port_vlan_dump_put(dev, dump);
+			err = switchdev_port_vlan_dump_put(dump);
 			if (err)
 				return err;
 		}
@@ -465,7 +694,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
 				/* prepend */
 				dump->begin = vlan->vid_begin;
 			} else {
-				err = switchdev_port_vlan_dump_put(dev, dump);
+				err = switchdev_port_vlan_dump_put(dump);
 				dump->flags = vlan->flags;
 				dump->begin = vlan->vid_begin;
 				dump->end = vlan->vid_end;
@@ -477,7 +706,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
 				/* append */
 				dump->end = vlan->vid_end;
 			} else {
-				err = switchdev_port_vlan_dump_put(dev, dump);
+				err = switchdev_port_vlan_dump_put(dump);
 				dump->flags = vlan->flags;
 				dump->begin = vlan->vid_begin;
 				dump->end = vlan->vid_end;
@@ -494,10 +723,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
 				    u32 filter_mask)
 {
 	struct switchdev_vlan_dump dump = {
-		.obj = {
-			.id = SWITCHDEV_OBJ_PORT_VLAN,
-			.cb = switchdev_port_vlan_dump_cb,
-		},
+		.vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
 		.skb = skb,
 		.filter_mask = filter_mask,
 	};
@@ -505,12 +731,13 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
 
 	if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
 	    (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
-		err = switchdev_port_obj_dump(dev, &dump.obj);
+		err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
+					      switchdev_port_vlan_dump_cb);
 		if (err)
 			goto err_out;
 		if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
 			/* last one */
-			err = switchdev_port_vlan_dump_put(dev, &dump);
+			err = switchdev_port_vlan_dump_put(&dump);
 	}
 
 err_out:
@@ -530,10 +757,10 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 				  int nlflags)
 {
 	struct switchdev_attr attr = {
-		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
 	};
 	u16 mode = BRIDGE_MODE_UNDEF;
-	u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
+	u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD;
 	int err;
 
 	err = switchdev_port_attr_get(dev, &attr);
@@ -551,7 +778,7 @@ static int switchdev_port_br_setflag(struct net_device *dev,
 				     unsigned long brport_flag)
 {
 	struct switchdev_attr attr = {
-		.id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+		.id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
 	};
 	u8 flag = nla_get_u8(nlattr);
 	int err;
@@ -604,6 +831,9 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
 			err = switchdev_port_br_setflag(dev, attr,
 							BR_LEARNING_SYNC);
 			break;
+		case IFLA_BRPORT_UNICAST_FLOOD:
+			err = switchdev_port_br_setflag(dev, attr, BR_FLOOD);
+			break;
 		default:
 			err = -EOPNOTSUPP;
 			break;
@@ -618,14 +848,13 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
 static int switchdev_port_br_afspec(struct net_device *dev,
 				    struct nlattr *afspec,
 				    int (*f)(struct net_device *dev,
-					     struct switchdev_obj *obj))
+					     const struct switchdev_obj *obj))
 {
 	struct nlattr *attr;
 	struct bridge_vlan_info *vinfo;
-	struct switchdev_obj obj = {
-		.id = SWITCHDEV_OBJ_PORT_VLAN,
+	struct switchdev_obj_port_vlan vlan = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
 	};
-	struct switchdev_obj_vlan *vlan = &obj.u.vlan;
 	int rem;
 	int err;
 
@@ -637,30 +866,33 @@ static int switchdev_port_br_afspec(struct net_device *dev,
 		vinfo = nla_data(attr);
 		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
 			return -EINVAL;
-		vlan->flags = vinfo->flags;
+		vlan.flags = vinfo->flags;
 		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
-			if (vlan->vid_begin)
+			if (vlan.vid_begin)
+				return -EINVAL;
+			vlan.vid_begin = vinfo->vid;
+			/* don't allow range of pvids */
+			if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
 				return -EINVAL;
-			vlan->vid_begin = vinfo->vid;
 		} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
-			if (!vlan->vid_begin)
+			if (!vlan.vid_begin)
 				return -EINVAL;
-			vlan->vid_end = vinfo->vid;
-			if (vlan->vid_end <= vlan->vid_begin)
+			vlan.vid_end = vinfo->vid;
+			if (vlan.vid_end <= vlan.vid_begin)
 				return -EINVAL;
-			err = f(dev, &obj);
+			err = f(dev, &vlan.obj);
 			if (err)
 				return err;
-			memset(vlan, 0, sizeof(*vlan));
+			vlan.vid_begin = 0;
 		} else {
-			if (vlan->vid_begin)
+			if (vlan.vid_begin)
 				return -EINVAL;
-			vlan->vid_begin = vinfo->vid;
-			vlan->vid_end = vinfo->vid;
-			err = f(dev, &obj);
+			vlan.vid_begin = vinfo->vid;
+			vlan.vid_end = vinfo->vid;
+			err = f(dev, &vlan.obj);
 			if (err)
 				return err;
-			memset(vlan, 0, sizeof(*vlan));
+			vlan.vid_begin = 0;
 		}
 	}
 
@@ -742,15 +974,13 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev, const unsigned char *addr,
 			   u16 vid, u16 nlm_flags)
 {
-	struct switchdev_obj obj = {
-		.id = SWITCHDEV_OBJ_PORT_FDB,
-		.u.fdb = {
-			.addr = addr,
-			.vid = vid,
-		},
+	struct switchdev_obj_port_fdb fdb = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+		.vid = vid,
 	};
 
-	return switchdev_port_obj_add(dev, &obj);
+	ether_addr_copy(fdb.addr, addr);
+	return switchdev_port_obj_add(dev, &fdb.obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
 
@@ -769,30 +999,29 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev, const unsigned char *addr,
 			   u16 vid)
 {
-	struct switchdev_obj obj = {
-		.id = SWITCHDEV_OBJ_PORT_FDB,
-		.u.fdb = {
-			.addr = addr,
-			.vid = vid,
-		},
+	struct switchdev_obj_port_fdb fdb = {
+		.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+		.vid = vid,
 	};
 
-	return switchdev_port_obj_del(dev, &obj);
+	ether_addr_copy(fdb.addr, addr);
+	return switchdev_port_obj_del(dev, &fdb.obj);
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
 
 struct switchdev_fdb_dump {
-	struct switchdev_obj obj;
+	struct switchdev_obj_port_fdb fdb;
+	struct net_device *dev;
 	struct sk_buff *skb;
 	struct netlink_callback *cb;
 	int idx;
 };
 
-static int switchdev_port_fdb_dump_cb(struct net_device *dev,
-				      struct switchdev_obj *obj)
+static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
 {
+	struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
 	struct switchdev_fdb_dump *dump =
-		container_of(obj, struct switchdev_fdb_dump, obj);
+		container_of(fdb, struct switchdev_fdb_dump, fdb);
 	u32 portid = NETLINK_CB(dump->cb->skb).portid;
 	u32 seq = dump->cb->nlh->nlmsg_seq;
 	struct nlmsghdr *nlh;
@@ -812,13 +1041,13 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev,
 	ndm->ndm_pad2    = 0;
 	ndm->ndm_flags   = NTF_SELF;
 	ndm->ndm_type    = 0;
-	ndm->ndm_ifindex = dev->ifindex;
-	ndm->ndm_state   = obj->u.fdb.ndm_state;
+	ndm->ndm_ifindex = dump->dev->ifindex;
+	ndm->ndm_state   = fdb->ndm_state;
 
-	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
+	if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
 		goto nla_put_failure;
 
-	if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
+	if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
 		goto nla_put_failure;
 
 	nlmsg_end(dump->skb, nlh);
@@ -848,16 +1077,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *filter_dev, int idx)
 {
 	struct switchdev_fdb_dump dump = {
-		.obj = {
-			.id = SWITCHDEV_OBJ_PORT_FDB,
-			.cb = switchdev_port_fdb_dump_cb,
-		},
+		.fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+		.dev = dev,
 		.skb = skb,
 		.cb = cb,
 		.idx = idx,
 	};
 
-	switchdev_port_obj_dump(dev, &dump.obj);
+	switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
 	return dump.idx;
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
@@ -888,12 +1115,14 @@ static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
 static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
 {
 	struct switchdev_attr attr = {
-		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
 	};
 	struct switchdev_attr prev_attr;
 	struct net_device *dev = NULL;
 	int nhsel;
 
+	ASSERT_RTNL();
+
 	/* For this route, all nexthop devs must be on the same switch. */
 
 	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
@@ -935,21 +1164,20 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
 int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
 			   u8 tos, u8 type, u32 nlflags, u32 tb_id)
 {
-	struct switchdev_obj fib_obj = {
-		.id = SWITCHDEV_OBJ_IPV4_FIB,
-		.u.ipv4_fib = {
-			.dst = dst,
-			.dst_len = dst_len,
-			.fi = fi,
-			.tos = tos,
-			.type = type,
-			.nlflags = nlflags,
-			.tb_id = tb_id,
-		},
+	struct switchdev_obj_ipv4_fib ipv4_fib = {
+		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
+		.dst = dst,
+		.dst_len = dst_len,
+		.tos = tos,
+		.type = type,
+		.nlflags = nlflags,
+		.tb_id = tb_id,
 	};
 	struct net_device *dev;
 	int err = 0;
 
+	memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
+
 	/* Don't offload route if using custom ip rules or if
 	 * IPv4 FIB offloading has been disabled completely.
 	 */
@@ -966,7 +1194,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
 	if (!dev)
 		return 0;
 
-	err = switchdev_port_obj_add(dev, &fib_obj);
+	err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
 	if (!err)
 		fi->fib_flags |= RTNH_F_OFFLOAD;
 
@@ -989,21 +1217,20 @@ EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
 int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
 			   u8 tos, u8 type, u32 tb_id)
 {
-	struct switchdev_obj fib_obj = {
-		.id = SWITCHDEV_OBJ_IPV4_FIB,
-		.u.ipv4_fib = {
-			.dst = dst,
-			.dst_len = dst_len,
-			.fi = fi,
-			.tos = tos,
-			.type = type,
-			.nlflags = 0,
-			.tb_id = tb_id,
-		},
+	struct switchdev_obj_ipv4_fib ipv4_fib = {
+		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
+		.dst = dst,
+		.dst_len = dst_len,
+		.tos = tos,
+		.type = type,
+		.nlflags = 0,
+		.tb_id = tb_id,
 	};
 	struct net_device *dev;
 	int err = 0;
 
+	memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
+
 	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
 		return 0;
 
@@ -1011,7 +1238,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
 	if (!dev)
 		return 0;
 
-	err = switchdev_port_obj_del(dev, &fib_obj);
+	err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
 	if (!err)
 		fi->fib_flags &= ~RTNH_F_OFFLOAD;
 
@@ -1043,11 +1270,11 @@ static bool switchdev_port_same_parent_id(struct net_device *a,
 					  struct net_device *b)
 {
 	struct switchdev_attr a_attr = {
-		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
 		.flags = SWITCHDEV_F_NO_RECURSE,
 	};
 	struct switchdev_attr b_attr = {
-		.id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
 		.flags = SWITCHDEV_F_NO_RECURSE,
 	};
 
@@ -1126,10 +1353,11 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 	u32 mark = dev->ifindex;
 	u32 reset_mark = 0;
 
-	if (group_dev && joining) {
-		mark = switchdev_port_fwd_mark_get(dev, group_dev);
-	} else if (group_dev && !joining) {
-		if (dev->offload_fwd_mark == mark)
+	if (group_dev) {
+		ASSERT_RTNL();
+		if (joining)
+			mark = switchdev_port_fwd_mark_get(dev, group_dev);
+		else if (dev->offload_fwd_mark == mark)
 			/* Ohoh, this port was the mark reference port,
 			 * but it's leaving the group, so reset the
 			 * mark for the remaining ports in the group.
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index eadba62afa85..9dc239dfe192 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -35,742 +35,301 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/tipc_config.h>
 #include "socket.h"
 #include "msg.h"
 #include "bcast.h"
 #include "name_distr.h"
-#include "core.h"
+#include "link.h"
+#include "node.h"
 
-#define	MAX_PKT_DEFAULT_MCAST	1500	/* bcast link max packet size (fixed) */
 #define	BCLINK_WIN_DEFAULT	50	/* bcast link window size (default) */
 #define	BCLINK_WIN_MIN	        32	/* bcast minimum link window size */
 
 const char tipc_bclink_name[] = "broadcast-link";
 
-static void tipc_nmap_diff(struct tipc_node_map *nm_a,
-			   struct tipc_node_map *nm_b,
-			   struct tipc_node_map *nm_diff);
-static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
-static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
-
-static void tipc_bclink_lock(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	spin_lock_bh(&tn->bclink->lock);
-}
-
-static void tipc_bclink_unlock(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	spin_unlock_bh(&tn->bclink->lock);
-}
-
-void tipc_bclink_input(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	tipc_sk_mcast_rcv(net, &tn->bclink->arrvq, &tn->bclink->inputq);
-}
-
-uint  tipc_bclink_get_mtu(void)
-{
-	return MAX_PKT_DEFAULT_MCAST;
-}
-
-static u32 bcbuf_acks(struct sk_buff *buf)
-{
-	return (u32)(unsigned long)TIPC_SKB_CB(buf)->handle;
-}
-
-static void bcbuf_set_acks(struct sk_buff *buf, u32 acks)
-{
-	TIPC_SKB_CB(buf)->handle = (void *)(unsigned long)acks;
-}
-
-static void bcbuf_decr_acks(struct sk_buff *buf)
-{
-	bcbuf_set_acks(buf, bcbuf_acks(buf) - 1);
-}
+/**
+ * struct tipc_bc_base - base structure for keeping broadcast send state
+ * @link: broadcast send link structure
+ * @inputq: data input queue; will only carry SOCK_WAKEUP messages
+ * @dest: array keeping number of reachable destinations per bearer
+ * @primary_bearer: a bearer having links to all broadcast destinations, if any
+ */
+struct tipc_bc_base {
+	struct tipc_link *link;
+	struct sk_buff_head inputq;
+	int dests[MAX_BEARERS];
+	int primary_bearer;
+};
 
-void tipc_bclink_add_node(struct net *net, u32 addr)
+static struct tipc_bc_base *tipc_bc_base(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	tipc_bclink_lock(net);
-	tipc_nmap_add(&tn->bclink->bcast_nodes, addr);
-	tipc_bclink_unlock(net);
+	return tipc_net(net)->bcbase;
 }
 
-void tipc_bclink_remove_node(struct net *net, u32 addr)
+int tipc_bcast_get_mtu(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	tipc_bclink_lock(net);
-	tipc_nmap_remove(&tn->bclink->bcast_nodes, addr);
-
-	/* Last node? => reset backlog queue */
-	if (!tn->bclink->bcast_nodes.count)
-		tipc_link_purge_backlog(&tn->bclink->link);
-
-	tipc_bclink_unlock(net);
+	return tipc_link_mtu(tipc_bc_sndlink(net));
 }
 
-static void bclink_set_last_sent(struct net *net)
+/* tipc_bcbase_select_primary(): find a bearer with links to all destinations,
+ *                               if any, and make it primary bearer
+ */
+static void tipc_bcbase_select_primary(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *bcl = tn->bcl;
+	struct tipc_bc_base *bb = tipc_bc_base(net);
+	int all_dests =  tipc_link_bc_peers(bb->link);
+	int i, mtu;
 
-	bcl->silent_intv_cnt = mod(bcl->snd_nxt - 1);
-}
+	bb->primary_bearer = INVALID_BEARER_ID;
 
-u32 tipc_bclink_get_last_sent(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	if (!all_dests)
+		return;
 
-	return tn->bcl->silent_intv_cnt;
-}
+	for (i = 0; i < MAX_BEARERS; i++) {
+		if (!bb->dests[i])
+			continue;
 
-static void bclink_update_last_sent(struct tipc_node *node, u32 seqno)
-{
-	node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ?
-						seqno : node->bclink.last_sent;
-}
+		mtu = tipc_bearer_mtu(net, i);
+		if (mtu < tipc_link_mtu(bb->link))
+			tipc_link_set_mtu(bb->link, mtu);
 
-/**
- * tipc_bclink_retransmit_to - get most recent node to request retransmission
- *
- * Called with bclink_lock locked
- */
-struct tipc_node *tipc_bclink_retransmit_to(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	return tn->bclink->retransmit_to;
-}
+		if (bb->dests[i] < all_dests)
+			continue;
 
-/**
- * bclink_retransmit_pkt - retransmit broadcast packets
- * @after: sequence number of last packet to *not* retransmit
- * @to: sequence number of last packet to retransmit
- *
- * Called with bclink_lock locked
- */
-static void bclink_retransmit_pkt(struct tipc_net *tn, u32 after, u32 to)
-{
-	struct sk_buff *skb;
-	struct tipc_link *bcl = tn->bcl;
+		bb->primary_bearer = i;
 
-	skb_queue_walk(&bcl->transmq, skb) {
-		if (more(buf_seqno(skb), after)) {
-			tipc_link_retransmit(bcl, skb, mod(to - after));
+		/* Reduce risk that all nodes select same primary */
+		if ((i ^ tipc_own_addr(net)) & 1)
 			break;
-		}
 	}
 }
 
-/**
- * bclink_prepare_wakeup - prepare users for wakeup after congestion
- * @bcl: broadcast link
- * @resultq: queue for users which can be woken up
- * Move a number of waiting users, as permitted by available space in
- * the send queue, from link wait queue to specified queue for wakeup
- */
-static void bclink_prepare_wakeup(struct tipc_link *bcl, struct sk_buff_head *resultq)
+void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id)
 {
-	int pnd[TIPC_SYSTEM_IMPORTANCE + 1] = {0,};
-	int imp, lim;
-	struct sk_buff *skb, *tmp;
-
-	skb_queue_walk_safe(&bcl->wakeupq, skb, tmp) {
-		imp = TIPC_SKB_CB(skb)->chain_imp;
-		lim = bcl->window + bcl->backlog[imp].limit;
-		pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
-		if ((pnd[imp] + bcl->backlog[imp].len) >= lim)
-			continue;
-		skb_unlink(skb, &bcl->wakeupq);
-		skb_queue_tail(resultq, skb);
-	}
-}
+	struct tipc_bc_base *bb = tipc_bc_base(net);
 
-/**
- * tipc_bclink_wakeup_users - wake up pending users
- *
- * Called with no locks taken
- */
-void tipc_bclink_wakeup_users(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *bcl = tn->bcl;
-	struct sk_buff_head resultq;
-
-	skb_queue_head_init(&resultq);
-	bclink_prepare_wakeup(bcl, &resultq);
-	tipc_sk_rcv(net, &resultq);
+	tipc_bcast_lock(net);
+	bb->dests[bearer_id]++;
+	tipc_bcbase_select_primary(net);
+	tipc_bcast_unlock(net);
 }
 
-/**
- * tipc_bclink_acknowledge - handle acknowledgement of broadcast packets
- * @n_ptr: node that sent acknowledgement info
- * @acked: broadcast sequence # that has been acknowledged
- *
- * Node is locked, bclink_lock unlocked.
- */
-void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
+void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id)
 {
-	struct sk_buff *skb, *tmp;
-	unsigned int released = 0;
-	struct net *net = n_ptr->net;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	if (unlikely(!n_ptr->bclink.recv_permitted))
-		return;
+	struct tipc_bc_base *bb = tipc_bc_base(net);
 
-	tipc_bclink_lock(net);
-
-	/* Bail out if tx queue is empty (no clean up is required) */
-	skb = skb_peek(&tn->bcl->transmq);
-	if (!skb)
-		goto exit;
-
-	/* Determine which messages need to be acknowledged */
-	if (acked == INVALID_LINK_SEQ) {
-		/*
-		 * Contact with specified node has been lost, so need to
-		 * acknowledge sent messages only (if other nodes still exist)
-		 * or both sent and unsent messages (otherwise)
-		 */
-		if (tn->bclink->bcast_nodes.count)
-			acked = tn->bcl->silent_intv_cnt;
-		else
-			acked = tn->bcl->snd_nxt;
-	} else {
-		/*
-		 * Bail out if specified sequence number does not correspond
-		 * to a message that has been sent and not yet acknowledged
-		 */
-		if (less(acked, buf_seqno(skb)) ||
-		    less(tn->bcl->silent_intv_cnt, acked) ||
-		    less_eq(acked, n_ptr->bclink.acked))
-			goto exit;
-	}
-
-	/* Skip over packets that node has previously acknowledged */
-	skb_queue_walk(&tn->bcl->transmq, skb) {
-		if (more(buf_seqno(skb), n_ptr->bclink.acked))
-			break;
-	}
-
-	/* Update packets that node is now acknowledging */
-	skb_queue_walk_from_safe(&tn->bcl->transmq, skb, tmp) {
-		if (more(buf_seqno(skb), acked))
-			break;
-		bcbuf_decr_acks(skb);
-		bclink_set_last_sent(net);
-		if (bcbuf_acks(skb) == 0) {
-			__skb_unlink(skb, &tn->bcl->transmq);
-			kfree_skb(skb);
-			released = 1;
-		}
-	}
-	n_ptr->bclink.acked = acked;
-
-	/* Try resolving broadcast link congestion, if necessary */
-	if (unlikely(skb_peek(&tn->bcl->backlogq))) {
-		tipc_link_push_packets(tn->bcl);
-		bclink_set_last_sent(net);
-	}
-	if (unlikely(released && !skb_queue_empty(&tn->bcl->wakeupq)))
-		n_ptr->action_flags |= TIPC_WAKEUP_BCAST_USERS;
-exit:
-	tipc_bclink_unlock(net);
+	tipc_bcast_lock(net);
+	bb->dests[bearer_id]--;
+	tipc_bcbase_select_primary(net);
+	tipc_bcast_unlock(net);
 }
 
-/**
- * tipc_bclink_update_link_state - update broadcast link state
+/* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers
  *
- * RCU and node lock set
+ * Note that number of reachable destinations, as indicated in the dests[]
+ * array, may transitionally differ from the number of destinations indicated
+ * in each sent buffer. We can sustain this. Excess destination nodes will
+ * drop and never acknowledge the unexpected packets, and missing destinations
+ * will either require retransmission (if they are just about to be added to
+ * the bearer), or be removed from the buffer's 'ackers' counter (if they
+ * just went down)
  */
-void tipc_bclink_update_link_state(struct tipc_node *n_ptr,
-				   u32 last_sent)
+static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq)
 {
-	struct sk_buff *buf;
-	struct net *net = n_ptr->net;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	/* Ignore "stale" link state info */
-	if (less_eq(last_sent, n_ptr->bclink.last_in))
-		return;
+	int bearer_id;
+	struct tipc_bc_base *bb = tipc_bc_base(net);
+	struct sk_buff *skb, *_skb;
+	struct sk_buff_head _xmitq;
 
-	/* Update link synchronization state; quit if in sync */
-	bclink_update_last_sent(n_ptr, last_sent);
-
-	if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in)
+	if (skb_queue_empty(xmitq))
 		return;
 
-	/* Update out-of-sync state; quit if loss is still unconfirmed */
-	if ((++n_ptr->bclink.oos_state) == 1) {
-		if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2))
-			return;
-		n_ptr->bclink.oos_state++;
-	}
-
-	/* Don't NACK if one has been recently sent (or seen) */
-	if (n_ptr->bclink.oos_state & 0x1)
+	/* The typical case: at least one bearer has links to all nodes */
+	bearer_id = bb->primary_bearer;
+	if (bearer_id >= 0) {
+		tipc_bearer_bc_xmit(net, bearer_id, xmitq);
 		return;
-
-	/* Send NACK */
-	buf = tipc_buf_acquire(INT_H_SIZE);
-	if (buf) {
-		struct tipc_msg *msg = buf_msg(buf);
-		struct sk_buff *skb = skb_peek(&n_ptr->bclink.deferdq);
-		u32 to = skb ? buf_seqno(skb) - 1 : n_ptr->bclink.last_sent;
-
-		tipc_msg_init(tn->own_addr, msg, BCAST_PROTOCOL, STATE_MSG,
-			      INT_H_SIZE, n_ptr->addr);
-		msg_set_non_seq(msg, 1);
-		msg_set_mc_netid(msg, tn->net_id);
-		msg_set_bcast_ack(msg, n_ptr->bclink.last_in);
-		msg_set_bcgap_after(msg, n_ptr->bclink.last_in);
-		msg_set_bcgap_to(msg, to);
-
-		tipc_bclink_lock(net);
-		tipc_bearer_send(net, MAX_BEARERS, buf, NULL);
-		tn->bcl->stats.sent_nacks++;
-		tipc_bclink_unlock(net);
-		kfree_skb(buf);
-
-		n_ptr->bclink.oos_state++;
 	}
-}
 
-void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *hdr)
-{
-	u16 last = msg_last_bcast(hdr);
-	int mtyp = msg_type(hdr);
+	/* We have to transmit across all bearers */
+	skb_queue_head_init(&_xmitq);
+	for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+		if (!bb->dests[bearer_id])
+			continue;
 
-	if (unlikely(msg_user(hdr) != LINK_PROTOCOL))
-		return;
-	if (mtyp == STATE_MSG) {
-		tipc_bclink_update_link_state(n, last);
-		return;
+		skb_queue_walk(xmitq, skb) {
+			_skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
+			if (!_skb)
+				break;
+			__skb_queue_tail(&_xmitq, _skb);
+		}
+		tipc_bearer_bc_xmit(net, bearer_id, &_xmitq);
 	}
-	/* Compatibility: older nodes don't know BCAST_PROTOCOL synchronization,
-	 * and transfer synch info in LINK_PROTOCOL messages.
-	 */
-	if (tipc_node_is_up(n))
-		return;
-	if ((mtyp != RESET_MSG) && (mtyp != ACTIVATE_MSG))
-		return;
-	n->bclink.last_sent = last;
-	n->bclink.last_in = last;
-	n->bclink.oos_state = 0;
+	__skb_queue_purge(xmitq);
+	__skb_queue_purge(&_xmitq);
 }
 
-/**
- * bclink_peek_nack - monitor retransmission requests sent by other nodes
- *
- * Delay any upcoming NACK by this node if another node has already
- * requested the first message this node is going to ask for.
- */
-static void bclink_peek_nack(struct net *net, struct tipc_msg *msg)
-{
-	struct tipc_node *n_ptr = tipc_node_find(net, msg_destnode(msg));
-
-	if (unlikely(!n_ptr))
-		return;
-
-	tipc_node_lock(n_ptr);
-	if (n_ptr->bclink.recv_permitted &&
-	    (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) &&
-	    (n_ptr->bclink.last_in == msg_bcgap_after(msg)))
-		n_ptr->bclink.oos_state = 2;
-	tipc_node_unlock(n_ptr);
-	tipc_node_put(n_ptr);
-}
-
-/* tipc_bclink_xmit - deliver buffer chain to all nodes in cluster
+/* tipc_bcast_xmit - deliver buffer chain to all nodes in cluster
  *                    and to identified node local sockets
  * @net: the applicable net namespace
  * @list: chain of buffers containing message
  * Consumes the buffer chain, except when returning -ELINKCONG
  * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
  */
-int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *bcl = tn->bcl;
-	struct tipc_bclink *bclink = tn->bclink;
+	struct tipc_link *l = tipc_bc_sndlink(net);
+	struct sk_buff_head xmitq, inputq, rcvq;
 	int rc = 0;
-	int bc = 0;
-	struct sk_buff *skb;
-	struct sk_buff_head arrvq;
-	struct sk_buff_head inputq;
 
-	/* Prepare clone of message for local node */
-	skb = tipc_msg_reassemble(list);
-	if (unlikely(!skb))
-		return -EHOSTUNREACH;
+	__skb_queue_head_init(&rcvq);
+	__skb_queue_head_init(&xmitq);
+	skb_queue_head_init(&inputq);
 
-	/* Broadcast to all nodes */
-	if (likely(bclink)) {
-		tipc_bclink_lock(net);
-		if (likely(bclink->bcast_nodes.count)) {
-			rc = __tipc_link_xmit(net, bcl, list);
-			if (likely(!rc)) {
-				u32 len = skb_queue_len(&bcl->transmq);
-
-				bclink_set_last_sent(net);
-				bcl->stats.queue_sz_counts++;
-				bcl->stats.accu_queue_sz += len;
-			}
-			bc = 1;
-		}
-		tipc_bclink_unlock(net);
-	}
+	/* Prepare message clone for local node */
+	if (unlikely(!tipc_msg_reassemble(list, &rcvq)))
+		return -EHOSTUNREACH;
 
-	if (unlikely(!bc))
-		__skb_queue_purge(list);
+	tipc_bcast_lock(net);
+	if (tipc_link_bc_peers(l))
+		rc = tipc_link_xmit(l, list, &xmitq);
+	tipc_bcast_unlock(net);
 
+	/* Don't send to local node if adding to link failed */
 	if (unlikely(rc)) {
-		kfree_skb(skb);
+		__skb_queue_purge(&rcvq);
 		return rc;
 	}
-	/* Deliver message clone */
-	__skb_queue_head_init(&arrvq);
-	skb_queue_head_init(&inputq);
-	__skb_queue_tail(&arrvq, skb);
-	tipc_sk_mcast_rcv(net, &arrvq, &inputq);
-	return rc;
-}
 
-/**
- * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet
- *
- * Called with both sending node's lock and bclink_lock taken.
- */
-static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
-{
-	struct tipc_net *tn = net_generic(node->net, tipc_net_id);
-
-	bclink_update_last_sent(node, seqno);
-	node->bclink.last_in = seqno;
-	node->bclink.oos_state = 0;
-	tn->bcl->stats.recv_info++;
-
-	/*
-	 * Unicast an ACK periodically, ensuring that
-	 * all nodes in the cluster don't ACK at the same time
-	 */
-	if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) {
-		tipc_link_proto_xmit(node_active_link(node, node->addr),
-				     STATE_MSG, 0, 0, 0, 0);
-		tn->bcl->stats.sent_acks++;
-	}
+	/* Broadcast to all nodes, inluding local node */
+	tipc_bcbase_xmit(net, &xmitq);
+	tipc_sk_mcast_rcv(net, &rcvq, &inputq);
+	__skb_queue_purge(list);
+	return 0;
 }
 
-/**
- * tipc_bclink_rcv - receive a broadcast packet, and deliver upwards
+/* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link
  *
  * RCU is locked, no other locks set
  */
-void tipc_bclink_rcv(struct net *net, struct sk_buff *buf)
+int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *bcl = tn->bcl;
-	struct tipc_msg *msg = buf_msg(buf);
-	struct tipc_node *node;
-	u32 next_in;
-	u32 seqno;
-	int deferred = 0;
-	int pos = 0;
-	struct sk_buff *iskb;
-	struct sk_buff_head *arrvq, *inputq;
-
-	/* Screen out unwanted broadcast messages */
-	if (msg_mc_netid(msg) != tn->net_id)
-		goto exit;
-
-	node = tipc_node_find(net, msg_prevnode(msg));
-	if (unlikely(!node))
-		goto exit;
-
-	tipc_node_lock(node);
-	if (unlikely(!node->bclink.recv_permitted))
-		goto unlock;
-
-	/* Handle broadcast protocol message */
-	if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) {
-		if (msg_type(msg) != STATE_MSG)
-			goto unlock;
-		if (msg_destnode(msg) == tn->own_addr) {
-			tipc_bclink_acknowledge(node, msg_bcast_ack(msg));
-			tipc_bclink_lock(net);
-			bcl->stats.recv_nacks++;
-			tn->bclink->retransmit_to = node;
-			bclink_retransmit_pkt(tn, msg_bcgap_after(msg),
-					      msg_bcgap_to(msg));
-			tipc_bclink_unlock(net);
-			tipc_node_unlock(node);
-		} else {
-			tipc_node_unlock(node);
-			bclink_peek_nack(net, msg);
-		}
-		tipc_node_put(node);
-		goto exit;
-	}
-
-	/* Handle in-sequence broadcast message */
-	seqno = msg_seqno(msg);
-	next_in = mod(node->bclink.last_in + 1);
-	arrvq = &tn->bclink->arrvq;
-	inputq = &tn->bclink->inputq;
-
-	if (likely(seqno == next_in)) {
-receive:
-		/* Deliver message to destination */
-		if (likely(msg_isdata(msg))) {
-			tipc_bclink_lock(net);
-			bclink_accept_pkt(node, seqno);
-			spin_lock_bh(&inputq->lock);
-			__skb_queue_tail(arrvq, buf);
-			spin_unlock_bh(&inputq->lock);
-			node->action_flags |= TIPC_BCAST_MSG_EVT;
-			tipc_bclink_unlock(net);
-			tipc_node_unlock(node);
-		} else if (msg_user(msg) == MSG_BUNDLER) {
-			tipc_bclink_lock(net);
-			bclink_accept_pkt(node, seqno);
-			bcl->stats.recv_bundles++;
-			bcl->stats.recv_bundled += msg_msgcnt(msg);
-			pos = 0;
-			while (tipc_msg_extract(buf, &iskb, &pos)) {
-				spin_lock_bh(&inputq->lock);
-				__skb_queue_tail(arrvq, iskb);
-				spin_unlock_bh(&inputq->lock);
-			}
-			node->action_flags |= TIPC_BCAST_MSG_EVT;
-			tipc_bclink_unlock(net);
-			tipc_node_unlock(node);
-		} else if (msg_user(msg) == MSG_FRAGMENTER) {
-			tipc_bclink_lock(net);
-			bclink_accept_pkt(node, seqno);
-			tipc_buf_append(&node->bclink.reasm_buf, &buf);
-			if (unlikely(!buf && !node->bclink.reasm_buf)) {
-				tipc_bclink_unlock(net);
-				goto unlock;
-			}
-			bcl->stats.recv_fragments++;
-			if (buf) {
-				bcl->stats.recv_fragmented++;
-				msg = buf_msg(buf);
-				tipc_bclink_unlock(net);
-				goto receive;
-			}
-			tipc_bclink_unlock(net);
-			tipc_node_unlock(node);
-		} else {
-			tipc_bclink_lock(net);
-			bclink_accept_pkt(node, seqno);
-			tipc_bclink_unlock(net);
-			tipc_node_unlock(node);
-			kfree_skb(buf);
-		}
-		buf = NULL;
+	struct tipc_msg *hdr = buf_msg(skb);
+	struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+	struct sk_buff_head xmitq;
+	int rc;
 
-		/* Determine new synchronization state */
-		tipc_node_lock(node);
-		if (unlikely(!tipc_node_is_up(node)))
-			goto unlock;
+	__skb_queue_head_init(&xmitq);
 
-		if (node->bclink.last_in == node->bclink.last_sent)
-			goto unlock;
-
-		if (skb_queue_empty(&node->bclink.deferdq)) {
-			node->bclink.oos_state = 1;
-			goto unlock;
-		}
-
-		msg = buf_msg(skb_peek(&node->bclink.deferdq));
-		seqno = msg_seqno(msg);
-		next_in = mod(next_in + 1);
-		if (seqno != next_in)
-			goto unlock;
-
-		/* Take in-sequence message from deferred queue & deliver it */
-		buf = __skb_dequeue(&node->bclink.deferdq);
-		goto receive;
-	}
-
-	/* Handle out-of-sequence broadcast message */
-	if (less(next_in, seqno)) {
-		deferred = tipc_link_defer_pkt(&node->bclink.deferdq,
-					       buf);
-		bclink_update_last_sent(node, seqno);
-		buf = NULL;
+	if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) {
+		kfree_skb(skb);
+		return 0;
 	}
 
-	tipc_bclink_lock(net);
-
-	if (deferred)
-		bcl->stats.deferred_recv++;
+	tipc_bcast_lock(net);
+	if (msg_user(hdr) == BCAST_PROTOCOL)
+		rc = tipc_link_bc_nack_rcv(l, skb, &xmitq);
 	else
-		bcl->stats.duplicates++;
+		rc = tipc_link_rcv(l, skb, NULL);
+	tipc_bcast_unlock(net);
 
-	tipc_bclink_unlock(net);
+	tipc_bcbase_xmit(net, &xmitq);
 
-unlock:
-	tipc_node_unlock(node);
-	tipc_node_put(node);
-exit:
-	kfree_skb(buf);
-}
+	/* Any socket wakeup messages ? */
+	if (!skb_queue_empty(inputq))
+		tipc_sk_rcv(net, inputq);
 
-u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
-{
-	return (n_ptr->bclink.recv_permitted &&
-		(tipc_bclink_get_last_sent(n_ptr->net) != n_ptr->bclink.acked));
+	return rc;
 }
 
-
-/**
- * tipc_bcbearer_send - send a packet through the broadcast pseudo-bearer
+/* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge
  *
- * Send packet over as many bearers as necessary to reach all nodes
- * that have joined the broadcast link.
- *
- * Returns 0 (packet sent successfully) under all circumstances,
- * since the broadcast link's pseudo-bearer never blocks
+ * RCU is locked, no other locks set
  */
-static int tipc_bcbearer_send(struct net *net, struct sk_buff *buf,
-			      struct tipc_bearer *unused1,
-			      struct tipc_media_addr *unused2)
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked)
 {
-	int bp_index;
-	struct tipc_msg *msg = buf_msg(buf);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_bcbearer *bcbearer = tn->bcbearer;
-	struct tipc_bclink *bclink = tn->bclink;
-
-	/* Prepare broadcast link message for reliable transmission,
-	 * if first time trying to send it;
-	 * preparation is skipped for broadcast link protocol messages
-	 * since they are sent in an unreliable manner and don't need it
-	 */
-	if (likely(!msg_non_seq(buf_msg(buf)))) {
-		bcbuf_set_acks(buf, bclink->bcast_nodes.count);
-		msg_set_non_seq(msg, 1);
-		msg_set_mc_netid(msg, tn->net_id);
-		tn->bcl->stats.sent_info++;
-		if (WARN_ON(!bclink->bcast_nodes.count)) {
-			dump_stack();
-			return 0;
-		}
-	}
+	struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+	struct sk_buff_head xmitq;
 
-	/* Send buffer over bearers until all targets reached */
-	bcbearer->remains = bclink->bcast_nodes;
-
-	for (bp_index = 0; bp_index < MAX_BEARERS; bp_index++) {
-		struct tipc_bearer *p = bcbearer->bpairs[bp_index].primary;
-		struct tipc_bearer *s = bcbearer->bpairs[bp_index].secondary;
-		struct tipc_bearer *bp[2] = {p, s};
-		struct tipc_bearer *b = bp[msg_link_selector(msg)];
-		struct sk_buff *tbuf;
-
-		if (!p)
-			break; /* No more bearers to try */
-		if (!b)
-			b = p;
-		tipc_nmap_diff(&bcbearer->remains, &b->nodes,
-			       &bcbearer->remains_new);
-		if (bcbearer->remains_new.count == bcbearer->remains.count)
-			continue; /* Nothing added by bearer pair */
-
-		if (bp_index == 0) {
-			/* Use original buffer for first bearer */
-			tipc_bearer_send(net, b->identity, buf, &b->bcast_addr);
-		} else {
-			/* Avoid concurrent buffer access */
-			tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
-			if (!tbuf)
-				break;
-			tipc_bearer_send(net, b->identity, tbuf,
-					 &b->bcast_addr);
-			kfree_skb(tbuf); /* Bearer keeps a clone */
-		}
-		if (bcbearer->remains_new.count == 0)
-			break; /* All targets reached */
+	__skb_queue_head_init(&xmitq);
 
-		bcbearer->remains = bcbearer->remains_new;
-	}
+	tipc_bcast_lock(net);
+	tipc_link_bc_ack_rcv(l, acked, &xmitq);
+	tipc_bcast_unlock(net);
 
-	return 0;
+	tipc_bcbase_xmit(net, &xmitq);
+
+	/* Any socket wakeup messages ? */
+	if (!skb_queue_empty(inputq))
+		tipc_sk_rcv(net, inputq);
 }
 
-/**
- * tipc_bcbearer_sort - create sets of bearer pairs used by broadcast bearer
+/* tipc_bcast_synch_rcv -  check and update rcv link with peer's send state
+ *
+ * RCU is locked, no other locks set
  */
-void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
-			u32 node, bool action)
+void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+			 struct tipc_msg *hdr)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_bcbearer *bcbearer = tn->bcbearer;
-	struct tipc_bcbearer_pair *bp_temp = bcbearer->bpairs_temp;
-	struct tipc_bcbearer_pair *bp_curr;
-	struct tipc_bearer *b;
-	int b_index;
-	int pri;
-
-	tipc_bclink_lock(net);
+	struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+	struct sk_buff_head xmitq;
 
-	if (action)
-		tipc_nmap_add(nm_ptr, node);
-	else
-		tipc_nmap_remove(nm_ptr, node);
+	__skb_queue_head_init(&xmitq);
 
-	/* Group bearers by priority (can assume max of two per priority) */
-	memset(bp_temp, 0, sizeof(bcbearer->bpairs_temp));
+	tipc_bcast_lock(net);
+	if (msg_type(hdr) == STATE_MSG) {
+		tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
+		tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+	} else {
+		tipc_link_bc_init_rcv(l, hdr);
+	}
+	tipc_bcast_unlock(net);
 
-	rcu_read_lock();
-	for (b_index = 0; b_index < MAX_BEARERS; b_index++) {
-		b = rcu_dereference_rtnl(tn->bearer_list[b_index]);
-		if (!b || !b->nodes.count)
-			continue;
+	tipc_bcbase_xmit(net, &xmitq);
 
-		if (!bp_temp[b->priority].primary)
-			bp_temp[b->priority].primary = b;
-		else
-			bp_temp[b->priority].secondary = b;
-	}
-	rcu_read_unlock();
+	/* Any socket wakeup messages ? */
+	if (!skb_queue_empty(inputq))
+		tipc_sk_rcv(net, inputq);
+}
 
-	/* Create array of bearer pairs for broadcasting */
-	bp_curr = bcbearer->bpairs;
-	memset(bcbearer->bpairs, 0, sizeof(bcbearer->bpairs));
+/* tipc_bcast_add_peer - add a peer node to broadcast link and bearer
+ *
+ * RCU is locked, node lock is set
+ */
+void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l,
+			 struct sk_buff_head *xmitq)
+{
+	struct tipc_link *snd_l = tipc_bc_sndlink(net);
 
-	for (pri = TIPC_MAX_LINK_PRI; pri >= 0; pri--) {
+	tipc_bcast_lock(net);
+	tipc_link_add_bc_peer(snd_l, uc_l, xmitq);
+	tipc_bcbase_select_primary(net);
+	tipc_bcast_unlock(net);
+}
 
-		if (!bp_temp[pri].primary)
-			continue;
+/* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer
+ *
+ * RCU is locked, node lock is set
+ */
+void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
+{
+	struct tipc_link *snd_l = tipc_bc_sndlink(net);
+	struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+	struct sk_buff_head xmitq;
 
-		bp_curr->primary = bp_temp[pri].primary;
+	__skb_queue_head_init(&xmitq);
 
-		if (bp_temp[pri].secondary) {
-			if (tipc_nmap_equal(&bp_temp[pri].primary->nodes,
-					    &bp_temp[pri].secondary->nodes)) {
-				bp_curr->secondary = bp_temp[pri].secondary;
-			} else {
-				bp_curr++;
-				bp_curr->primary = bp_temp[pri].secondary;
-			}
-		}
+	tipc_bcast_lock(net);
+	tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq);
+	tipc_bcbase_select_primary(net);
+	tipc_bcast_unlock(net);
 
-		bp_curr++;
-	}
+	tipc_bcbase_xmit(net, &xmitq);
 
-	tipc_bclink_unlock(net);
+	/* Any socket wakeup messages ? */
+	if (!skb_queue_empty(inputq))
+		tipc_sk_rcv(net, inputq);
 }
 
 static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb,
@@ -836,7 +395,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
 	if (!bcl)
 		return 0;
 
-	tipc_bclink_lock(net);
+	tipc_bcast_lock(net);
 
 	hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
 			  NLM_F_MULTI, TIPC_NL_LINK_GET);
@@ -871,7 +430,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
 	if (err)
 		goto attr_msg_full;
 
-	tipc_bclink_unlock(net);
+	tipc_bcast_unlock(net);
 	nla_nest_end(msg->skb, attrs);
 	genlmsg_end(msg->skb, hdr);
 
@@ -882,7 +441,7 @@ prop_msg_full:
 attr_msg_full:
 	nla_nest_cancel(msg->skb, attrs);
 msg_full:
-	tipc_bclink_unlock(net);
+	tipc_bcast_unlock(net);
 	genlmsg_cancel(msg->skb, hdr);
 
 	return -EMSGSIZE;
@@ -896,26 +455,25 @@ int tipc_bclink_reset_stats(struct net *net)
 	if (!bcl)
 		return -ENOPROTOOPT;
 
-	tipc_bclink_lock(net);
+	tipc_bcast_lock(net);
 	memset(&bcl->stats, 0, sizeof(bcl->stats));
-	tipc_bclink_unlock(net);
+	tipc_bcast_unlock(net);
 	return 0;
 }
 
-int tipc_bclink_set_queue_limits(struct net *net, u32 limit)
+static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *bcl = tn->bcl;
+	struct tipc_link *l = tipc_bc_sndlink(net);
 
-	if (!bcl)
+	if (!l)
 		return -ENOPROTOOPT;
 	if (limit < BCLINK_WIN_MIN)
 		limit = BCLINK_WIN_MIN;
 	if (limit > TIPC_MAX_LINK_WIN)
 		return -EINVAL;
-	tipc_bclink_lock(net);
-	tipc_link_set_queue_limits(bcl, limit);
-	tipc_bclink_unlock(net);
+	tipc_bcast_lock(net);
+	tipc_link_set_queue_limits(l, limit);
+	tipc_bcast_unlock(net);
 	return 0;
 }
 
@@ -937,123 +495,51 @@ int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[])
 
 	win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
 
-	return tipc_bclink_set_queue_limits(net, win);
+	return tipc_bc_link_set_queue_limits(net, win);
 }
 
-int tipc_bclink_init(struct net *net)
+int tipc_bcast_init(struct net *net)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_bcbearer *bcbearer;
-	struct tipc_bclink *bclink;
-	struct tipc_link *bcl;
-
-	bcbearer = kzalloc(sizeof(*bcbearer), GFP_ATOMIC);
-	if (!bcbearer)
-		return -ENOMEM;
-
-	bclink = kzalloc(sizeof(*bclink), GFP_ATOMIC);
-	if (!bclink) {
-		kfree(bcbearer);
-		return -ENOMEM;
-	}
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_bc_base *bb = NULL;
+	struct tipc_link *l = NULL;
 
-	bcl = &bclink->link;
-	bcbearer->bearer.media = &bcbearer->media;
-	bcbearer->media.send_msg = tipc_bcbearer_send;
-	sprintf(bcbearer->media.name, "tipc-broadcast");
-
-	spin_lock_init(&bclink->lock);
-	__skb_queue_head_init(&bcl->transmq);
-	__skb_queue_head_init(&bcl->backlogq);
-	__skb_queue_head_init(&bcl->deferdq);
-	skb_queue_head_init(&bcl->wakeupq);
-	bcl->snd_nxt = 1;
-	spin_lock_init(&bclink->node.lock);
-	__skb_queue_head_init(&bclink->arrvq);
-	skb_queue_head_init(&bclink->inputq);
-	bcl->owner = &bclink->node;
-	bcl->owner->net = net;
-	bcl->mtu = MAX_PKT_DEFAULT_MCAST;
-	tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
-	bcl->bearer_id = MAX_BEARERS;
-	rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
-	bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg;
-	msg_set_prevnode(bcl->pmsg, tn->own_addr);
-	strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
-	tn->bcbearer = bcbearer;
-	tn->bclink = bclink;
-	tn->bcl = bcl;
-	return 0;
-}
+	bb = kzalloc(sizeof(*bb), GFP_ATOMIC);
+	if (!bb)
+		goto enomem;
+	tn->bcbase = bb;
+	spin_lock_init(&tipc_net(net)->bclock);
 
-void tipc_bclink_stop(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-
-	tipc_bclink_lock(net);
-	tipc_link_purge_queues(tn->bcl);
-	tipc_bclink_unlock(net);
-
-	RCU_INIT_POINTER(tn->bearer_list[BCBEARER], NULL);
-	synchronize_net();
-	kfree(tn->bcbearer);
-	kfree(tn->bclink);
+	if (!tipc_link_bc_create(net, 0, 0,
+				 U16_MAX,
+				 BCLINK_WIN_DEFAULT,
+				 0,
+				 &bb->inputq,
+				 NULL,
+				 NULL,
+				 &l))
+		goto enomem;
+	bb->link = l;
+	tn->bcl = l;
+	return 0;
+enomem:
+	kfree(bb);
+	kfree(l);
+	return -ENOMEM;
 }
 
-/**
- * tipc_nmap_add - add a node to a node map
- */
-static void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
+void tipc_bcast_reinit(struct net *net)
 {
-	int n = tipc_node(node);
-	int w = n / WSIZE;
-	u32 mask = (1 << (n % WSIZE));
+	struct tipc_bc_base *b = tipc_bc_base(net);
 
-	if ((nm_ptr->map[w] & mask) == 0) {
-		nm_ptr->count++;
-		nm_ptr->map[w] |= mask;
-	}
+	msg_set_prevnode(b->link->pmsg, tipc_own_addr(net));
 }
 
-/**
- * tipc_nmap_remove - remove a node from a node map
- */
-static void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
+void tipc_bcast_stop(struct net *net)
 {
-	int n = tipc_node(node);
-	int w = n / WSIZE;
-	u32 mask = (1 << (n % WSIZE));
-
-	if ((nm_ptr->map[w] & mask) != 0) {
-		nm_ptr->map[w] &= ~mask;
-		nm_ptr->count--;
-	}
-}
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 
-/**
- * tipc_nmap_diff - find differences between node maps
- * @nm_a: input node map A
- * @nm_b: input node map B
- * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
- */
-static void tipc_nmap_diff(struct tipc_node_map *nm_a,
-			   struct tipc_node_map *nm_b,
-			   struct tipc_node_map *nm_diff)
-{
-	int stop = ARRAY_SIZE(nm_a->map);
-	int w;
-	int b;
-	u32 map;
-
-	memset(nm_diff, 0, sizeof(*nm_diff));
-	for (w = 0; w < stop; w++) {
-		map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
-		nm_diff->map[w] = map;
-		if (map != 0) {
-			for (b = 0 ; b < WSIZE; b++) {
-				if (map & (1 << b))
-					nm_diff->count++;
-			}
-		}
-	}
+	synchronize_net();
+	kfree(tn->bcbase);
+	kfree(tn->bcl);
 }
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index d74c69bcf60b..2855b9356a15 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -37,102 +37,44 @@
 #ifndef _TIPC_BCAST_H
 #define _TIPC_BCAST_H
 
-#include <linux/tipc_config.h>
-#include "link.h"
-#include "node.h"
+#include "core.h"
 
-/**
- * struct tipc_bcbearer_pair - a pair of bearers used by broadcast link
- * @primary: pointer to primary bearer
- * @secondary: pointer to secondary bearer
- *
- * Bearers must have same priority and same set of reachable destinations
- * to be paired.
- */
-
-struct tipc_bcbearer_pair {
-	struct tipc_bearer *primary;
-	struct tipc_bearer *secondary;
-};
-
-#define	BCBEARER		MAX_BEARERS
-
-/**
- * struct tipc_bcbearer - bearer used by broadcast link
- * @bearer: (non-standard) broadcast bearer structure
- * @media: (non-standard) broadcast media structure
- * @bpairs: array of bearer pairs
- * @bpairs_temp: temporary array of bearer pairs used by tipc_bcbearer_sort()
- * @remains: temporary node map used by tipc_bcbearer_send()
- * @remains_new: temporary node map used tipc_bcbearer_send()
- *
- * Note: The fields labelled "temporary" are incorporated into the bearer
- * to avoid consuming potentially limited stack space through the use of
- * large local variables within multicast routines.  Concurrent access is
- * prevented through use of the spinlock "bclink_lock".
- */
-struct tipc_bcbearer {
-	struct tipc_bearer bearer;
-	struct tipc_media media;
-	struct tipc_bcbearer_pair bpairs[MAX_BEARERS];
-	struct tipc_bcbearer_pair bpairs_temp[TIPC_MAX_LINK_PRI + 1];
-	struct tipc_node_map remains;
-	struct tipc_node_map remains_new;
-};
+struct tipc_node;
+struct tipc_msg;
+struct tipc_nl_msg;
+struct tipc_node_map;
 
-/**
- * struct tipc_bclink - link used for broadcast messages
- * @lock: spinlock governing access to structure
- * @link: (non-standard) broadcast link structure
- * @node: (non-standard) node structure representing b'cast link's peer node
- * @bcast_nodes: map of broadcast-capable nodes
- * @retransmit_to: node that most recently requested a retransmit
- *
- * Handles sequence numbering, fragmentation, bundling, etc.
- */
-struct tipc_bclink {
-	spinlock_t lock;
-	struct tipc_link link;
-	struct tipc_node node;
-	struct sk_buff_head arrvq;
-	struct sk_buff_head inputq;
-	struct tipc_node_map bcast_nodes;
-	struct tipc_node *retransmit_to;
-};
+int tipc_bcast_init(struct net *net);
+void tipc_bcast_reinit(struct net *net);
+void tipc_bcast_stop(struct net *net);
+void tipc_bcast_add_peer(struct net *net, struct tipc_link *l,
+			 struct sk_buff_head *xmitq);
+void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl);
+void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
+void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
+int  tipc_bcast_get_mtu(struct net *net);
+int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list);
+int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
+void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked);
+void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+			 struct tipc_msg *hdr);
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
+int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
+int tipc_bclink_reset_stats(struct net *net);
 
-struct tipc_node;
-extern const char tipc_bclink_name[];
+static inline void tipc_bcast_lock(struct net *net)
+{
+	spin_lock_bh(&tipc_net(net)->bclock);
+}
 
-/**
- * tipc_nmap_equal - test for equality of node maps
- */
-static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
-				  struct tipc_node_map *nm_b)
+static inline void tipc_bcast_unlock(struct net *net)
 {
-	return !memcmp(nm_a, nm_b, sizeof(*nm_a));
+	spin_unlock_bh(&tipc_net(net)->bclock);
 }
 
-int tipc_bclink_init(struct net *net);
-void tipc_bclink_stop(struct net *net);
-void tipc_bclink_add_node(struct net *net, u32 addr);
-void tipc_bclink_remove_node(struct net *net, u32 addr);
-struct tipc_node *tipc_bclink_retransmit_to(struct net *tn);
-void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
-void tipc_bclink_rcv(struct net *net, struct sk_buff *buf);
-u32  tipc_bclink_get_last_sent(struct net *net);
-u32  tipc_bclink_acks_missing(struct tipc_node *n_ptr);
-void tipc_bclink_update_link_state(struct tipc_node *node,
-				   u32 last_sent);
-int  tipc_bclink_reset_stats(struct net *net);
-int  tipc_bclink_set_queue_limits(struct net *net, u32 limit);
-void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
-			u32 node, bool action);
-uint  tipc_bclink_get_mtu(void);
-int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list);
-void tipc_bclink_wakeup_users(struct net *net);
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
-int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
-void tipc_bclink_input(struct net *net);
-void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *msg);
+static inline struct tipc_link *tipc_bc_sndlink(struct net *net)
+{
+	return tipc_net(net)->bcl;
+}
 
 #endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index ce9f7bfc0b92..648f2a67f314 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -193,10 +193,8 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 
 	rcu_read_lock();
 	b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-	if (b_ptr) {
-		tipc_bcbearer_sort(net, &b_ptr->nodes, dest, true);
+	if (b_ptr)
 		tipc_disc_add_dest(b_ptr->link_req);
-	}
 	rcu_read_unlock();
 }
 
@@ -207,10 +205,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
 
 	rcu_read_lock();
 	b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-	if (b_ptr) {
-		tipc_bcbearer_sort(net, &b_ptr->nodes, dest, false);
+	if (b_ptr)
 		tipc_disc_remove_dest(b_ptr->link_req);
-	}
 	rcu_read_unlock();
 }
 
@@ -362,6 +358,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr)
 	b_ptr->media->disable_media(b_ptr);
 
 	tipc_node_delete_links(net, b_ptr->identity);
+	RCU_INIT_POINTER(b_ptr->media_ptr, NULL);
 	if (b_ptr->link_req)
 		tipc_disc_delete(b_ptr->link_req);
 
@@ -399,16 +396,13 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
 
 /* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
  *
- * Mark L2 bearer as inactive so that incoming buffers are thrown away,
- * then get worker thread to complete bearer cleanup.  (Can't do cleanup
- * here because cleanup code needs to sleep and caller holds spinlocks.)
+ * Mark L2 bearer as inactive so that incoming buffers are thrown away
  */
 void tipc_disable_l2_media(struct tipc_bearer *b)
 {
 	struct net_device *dev;
 
 	dev = (struct net_device *)rtnl_dereference(b->media_ptr);
-	RCU_INIT_POINTER(b->media_ptr, NULL);
 	RCU_INIT_POINTER(dev->tipc_ptr, NULL);
 	synchronize_net();
 	dev_put(dev);
@@ -420,10 +414,9 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
  * @b_ptr: the bearer through which the packet is to be sent
  * @dest: peer destination address
  */
-int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
+int tipc_l2_send_msg(struct net *net, struct sk_buff *skb,
 		     struct tipc_bearer *b, struct tipc_media_addr *dest)
 {
-	struct sk_buff *clone;
 	struct net_device *dev;
 	int delta;
 
@@ -431,42 +424,48 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
 	if (!dev)
 		return 0;
 
-	clone = skb_clone(buf, GFP_ATOMIC);
-	if (!clone)
-		return 0;
-
-	delta = dev->hard_header_len - skb_headroom(buf);
+	delta = dev->hard_header_len - skb_headroom(skb);
 	if ((delta > 0) &&
-	    pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
-		kfree_skb(clone);
+	    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
+		kfree_skb(skb);
 		return 0;
 	}
 
-	skb_reset_network_header(clone);
-	clone->dev = dev;
-	clone->protocol = htons(ETH_P_TIPC);
-	dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,
-			dev->dev_addr, clone->len);
-	dev_queue_xmit(clone);
+	skb_reset_network_header(skb);
+	skb->dev = dev;
+	skb->protocol = htons(ETH_P_TIPC);
+	dev_hard_header(skb, dev, ETH_P_TIPC, dest->value,
+			dev->dev_addr, skb->len);
+	dev_queue_xmit(skb);
 	return 0;
 }
 
-/* tipc_bearer_send- sends buffer to destination over bearer
- *
- * IMPORTANT:
- * The media send routine must not alter the buffer being passed in
- * as it may be needed for later retransmission!
+int tipc_bearer_mtu(struct net *net, u32 bearer_id)
+{
+	int mtu = 0;
+	struct tipc_bearer *b;
+
+	rcu_read_lock();
+	b = rcu_dereference_rtnl(tipc_net(net)->bearer_list[bearer_id]);
+	if (b)
+		mtu = b->mtu;
+	rcu_read_unlock();
+	return mtu;
+}
+
+/* tipc_bearer_xmit_skb - sends buffer to destination over bearer
  */
-void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
-		      struct tipc_media_addr *dest)
+void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
+			  struct sk_buff *skb,
+			  struct tipc_media_addr *dest)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_bearer *b_ptr;
+	struct tipc_net *tn = tipc_net(net);
+	struct tipc_bearer *b;
 
 	rcu_read_lock();
-	b_ptr = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-	if (likely(b_ptr))
-		b_ptr->media->send_msg(net, buf, b_ptr, dest);
+	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+	if (likely(b))
+		b->media->send_msg(net, skb, b, dest);
 	rcu_read_unlock();
 }
 
@@ -489,8 +488,31 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id,
 		skb_queue_walk_safe(xmitq, skb, tmp) {
 			__skb_dequeue(xmitq);
 			b->media->send_msg(net, skb, b, dst);
-			/* Until we remove cloning in tipc_l2_send_msg(): */
-			kfree_skb(skb);
+		}
+	}
+	rcu_read_unlock();
+}
+
+/* tipc_bearer_bc_xmit() - broadcast buffers to all destinations
+ */
+void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
+			 struct sk_buff_head *xmitq)
+{
+	struct tipc_net *tn = tipc_net(net);
+	int net_id = tn->net_id;
+	struct tipc_bearer *b;
+	struct sk_buff *skb, *tmp;
+	struct tipc_msg *hdr;
+
+	rcu_read_lock();
+	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+	if (likely(b)) {
+		skb_queue_walk_safe(xmitq, skb, tmp) {
+			hdr = buf_msg(skb);
+			msg_set_non_seq(hdr, 1);
+			msg_set_mc_netid(hdr, net_id);
+			__skb_dequeue(xmitq);
+			b->media->send_msg(net, skb, b, &b->bcast_addr);
 		}
 	}
 	rcu_read_unlock();
@@ -554,7 +576,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
 	case NETDEV_CHANGE:
 		if (netif_carrier_ok(dev))
 			break;
-	case NETDEV_DOWN:
+	case NETDEV_GOING_DOWN:
 	case NETDEV_CHANGEMTU:
 		tipc_reset_bearer(net, b_ptr);
 		break;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 6426f242f626..552185bc4773 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -163,6 +163,7 @@ struct tipc_bearer {
 	u32 identity;
 	struct tipc_link_req *link_req;
 	char net_plane;
+	int node_cnt;
 	struct tipc_node_map nodes;
 };
 
@@ -215,10 +216,14 @@ struct tipc_media *tipc_media_find(const char *name);
 int tipc_bearer_setup(void);
 void tipc_bearer_cleanup(void);
 void tipc_bearer_stop(struct net *net);
-void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
-		      struct tipc_media_addr *dest);
+int tipc_bearer_mtu(struct net *net, u32 bearer_id);
+void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id,
+			  struct sk_buff *skb,
+			  struct tipc_media_addr *dest);
 void tipc_bearer_xmit(struct net *net, u32 bearer_id,
 		      struct sk_buff_head *xmitq,
 		      struct tipc_media_addr *dst);
+void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id,
+			 struct sk_buff_head *xmitq);
 
 #endif	/* _TIPC_BEARER_H */
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 005ba5eb0ea4..03a842870c52 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -42,6 +42,7 @@
 #include "bearer.h"
 #include "net.h"
 #include "socket.h"
+#include "bcast.h"
 
 #include <linux/module.h>
 
@@ -71,8 +72,15 @@ static int __net_init tipc_init_net(struct net *net)
 	err = tipc_topsrv_start(net);
 	if (err)
 		goto out_subscr;
+
+	err = tipc_bcast_init(net);
+	if (err)
+		goto out_bclink;
+
 	return 0;
 
+out_bclink:
+	tipc_bcast_stop(net);
 out_subscr:
 	tipc_nametbl_stop(net);
 out_nametbl:
@@ -85,6 +93,7 @@ static void __net_exit tipc_exit_net(struct net *net)
 {
 	tipc_topsrv_stop(net);
 	tipc_net_stop(net);
+	tipc_bcast_stop(net);
 	tipc_nametbl_stop(net);
 	tipc_sk_rht_destroy(net);
 }
diff --git a/net/tipc/core.h b/net/tipc/core.h
index b96b41eabf12..18e95a8020cd 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -62,8 +62,7 @@
 
 struct tipc_node;
 struct tipc_bearer;
-struct tipc_bcbearer;
-struct tipc_bclink;
+struct tipc_bc_base;
 struct tipc_link;
 struct tipc_name_table;
 struct tipc_server;
@@ -93,8 +92,8 @@ struct tipc_net {
 	struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1];
 
 	/* Broadcast link */
-	struct tipc_bcbearer *bcbearer;
-	struct tipc_bclink *bclink;
+	spinlock_t bclock;
+	struct tipc_bc_base *bcbase;
 	struct tipc_link *bcl;
 
 	/* Socket hash table */
@@ -114,6 +113,11 @@ static inline struct tipc_net *tipc_net(struct net *net)
 	return net_generic(net, tipc_net_id);
 }
 
+static inline int tipc_netid(struct net *net)
+{
+	return tipc_net(net)->net_id;
+}
+
 static inline u16 mod(u16 x)
 {
 	return x & 0xffffu;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index d14e0a4aa9af..afe8c47c4085 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -89,7 +89,7 @@ static void tipc_disc_init_msg(struct net *net, struct sk_buff *buf, u32 type,
 		      MAX_H_SIZE, dest_domain);
 	msg_set_non_seq(msg, 1);
 	msg_set_node_sig(msg, tn->random);
-	msg_set_node_capabilities(msg, 0);
+	msg_set_node_capabilities(msg, TIPC_NODE_CAPABILITIES);
 	msg_set_dest_domain(msg, dest_domain);
 	msg_set_bc_netid(msg, tn->net_id);
 	b_ptr->media->addr2msg(msg_media_addr(msg), &b_ptr->addr);
@@ -167,11 +167,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
 	/* Send response, if necessary */
 	if (respond && (mtyp == DSC_REQ_MSG)) {
 		rskb = tipc_buf_acquire(MAX_H_SIZE);
-		if (rskb) {
-			tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
-			tipc_bearer_send(net, bearer->identity, rskb, &maddr);
-			kfree_skb(rskb);
-		}
+		if (!rskb)
+			return;
+		tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
+		tipc_bearer_xmit_skb(net, bearer->identity, rskb, &maddr);
 	}
 }
 
@@ -225,6 +224,7 @@ void tipc_disc_remove_dest(struct tipc_link_req *req)
 static void disc_timeout(unsigned long data)
 {
 	struct tipc_link_req *req = (struct tipc_link_req *)data;
+	struct sk_buff *skb;
 	int max_delay;
 
 	spin_lock_bh(&req->lock);
@@ -242,9 +242,9 @@ static void disc_timeout(unsigned long data)
 	 * hold at fast polling rate if don't have any associated nodes,
 	 * otherwise hold at slow polling rate
 	 */
-	tipc_bearer_send(req->net, req->bearer_id, req->buf, &req->dest);
-
-
+	skb = skb_clone(req->buf, GFP_ATOMIC);
+	if (skb)
+		tipc_bearer_xmit_skb(req->net, req->bearer_id, skb, &req->dest);
 	req->timer_intv *= 2;
 	if (req->num_nodes)
 		max_delay = TIPC_LINK_REQ_SLOW;
@@ -271,6 +271,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
 		     struct tipc_media_addr *dest)
 {
 	struct tipc_link_req *req;
+	struct sk_buff *skb;
 
 	req = kmalloc(sizeof(*req), GFP_ATOMIC);
 	if (!req)
@@ -292,7 +293,9 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b_ptr,
 	setup_timer(&req->timer, disc_timeout, (unsigned long)req);
 	mod_timer(&req->timer, jiffies + req->timer_intv);
 	b_ptr->link_req = req;
-	tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest);
+	skb = skb_clone(req->buf, GFP_ATOMIC);
+	if (skb)
+		tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
 	return 0;
 }
 
@@ -316,6 +319,7 @@ void tipc_disc_delete(struct tipc_link_req *req)
 void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
 {
 	struct tipc_link_req *req = b_ptr->link_req;
+	struct sk_buff *skb;
 
 	spin_lock_bh(&req->lock);
 	tipc_disc_init_msg(net, req->buf, DSC_REQ_MSG, b_ptr);
@@ -325,6 +329,8 @@ void tipc_disc_reset(struct net *net, struct tipc_bearer *b_ptr)
 	req->num_nodes = 0;
 	req->timer_intv = TIPC_LINK_REQ_INIT;
 	mod_timer(&req->timer, jiffies + req->timer_intv);
-	tipc_bearer_send(net, req->bearer_id, req->buf, &req->dest);
+	skb = skb_clone(req->buf, GFP_ATOMIC);
+	if (skb)
+		tipc_bearer_xmit_skb(net, req->bearer_id, skb, &req->dest);
 	spin_unlock_bh(&req->lock);
 }
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 75db07c78a69..9efbdbde2b08 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -50,6 +50,7 @@
  */
 static const char *link_co_err = "Link tunneling error, ";
 static const char *link_rst_msg = "Resetting link ";
+static const char tipc_bclink_name[] = "broadcast-link";
 
 static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
 	[TIPC_NLA_LINK_UNSPEC]		= { .type = NLA_UNSPEC },
@@ -75,6 +76,14 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
 	[TIPC_NLA_PROP_WIN]		= { .type = NLA_U32 }
 };
 
+/* Send states for broadcast NACKs
+ */
+enum {
+	BC_NACK_SND_CONDITIONAL,
+	BC_NACK_SND_UNCONDITIONAL,
+	BC_NACK_SND_SUPPRESS,
+};
+
 /*
  * Interval between NACKs when packets arrive out of order
  */
@@ -110,7 +119,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 				      struct sk_buff_head *xmitq);
 static void link_reset_statistics(struct tipc_link *l_ptr);
 static void link_print(struct tipc_link *l_ptr, const char *str);
-static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
+static void tipc_link_build_nack_msg(struct tipc_link *l,
+				     struct sk_buff_head *xmitq);
+static void tipc_link_build_bc_init_msg(struct tipc_link *l,
+					struct sk_buff_head *xmitq);
+static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
 
 /*
  *  Simple non-static link routines (i.e. referenced outside this file)
@@ -120,11 +133,21 @@ bool tipc_link_is_up(struct tipc_link *l)
 	return link_is_up(l);
 }
 
+bool tipc_link_peer_is_down(struct tipc_link *l)
+{
+	return l->state == LINK_PEER_RESET;
+}
+
 bool tipc_link_is_reset(struct tipc_link *l)
 {
 	return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING);
 }
 
+bool tipc_link_is_establishing(struct tipc_link *l)
+{
+	return l->state == LINK_ESTABLISHING;
+}
+
 bool tipc_link_is_synching(struct tipc_link *l)
 {
 	return l->state == LINK_SYNCHING;
@@ -140,11 +163,66 @@ bool tipc_link_is_blocked(struct tipc_link *l)
 	return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER);
 }
 
+static bool link_is_bc_sndlink(struct tipc_link *l)
+{
+	return !l->bc_sndlink;
+}
+
+static bool link_is_bc_rcvlink(struct tipc_link *l)
+{
+	return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l));
+}
+
 int tipc_link_is_active(struct tipc_link *l)
 {
-	struct tipc_node *n = l->owner;
+	return l->active;
+}
+
+void tipc_link_set_active(struct tipc_link *l, bool active)
+{
+	l->active = active;
+}
+
+void tipc_link_add_bc_peer(struct tipc_link *snd_l,
+			   struct tipc_link *uc_l,
+			   struct sk_buff_head *xmitq)
+{
+	struct tipc_link *rcv_l = uc_l->bc_rcvlink;
+
+	snd_l->ackers++;
+	rcv_l->acked = snd_l->snd_nxt - 1;
+	tipc_link_build_bc_init_msg(uc_l, xmitq);
+}
+
+void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
+			      struct tipc_link *rcv_l,
+			      struct sk_buff_head *xmitq)
+{
+	u16 ack = snd_l->snd_nxt - 1;
+
+	snd_l->ackers--;
+	tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
+	tipc_link_reset(rcv_l);
+	rcv_l->state = LINK_RESET;
+	if (!snd_l->ackers) {
+		tipc_link_reset(snd_l);
+		__skb_queue_purge(xmitq);
+	}
+}
+
+int tipc_link_bc_peers(struct tipc_link *l)
+{
+	return l->ackers;
+}
+
+void tipc_link_set_mtu(struct tipc_link *l, int mtu)
+{
+	l->mtu = mtu;
+}
 
-	return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l);
+int tipc_link_mtu(struct tipc_link *l)
+{
+	return l->mtu;
 }
 
 static u32 link_own_addr(struct tipc_link *l)
@@ -155,57 +233,72 @@ static u32 link_own_addr(struct tipc_link *l)
 /**
  * tipc_link_create - create a new link
  * @n: pointer to associated node
- * @b: pointer to associated bearer
+ * @if_name: associated interface name
+ * @bearer_id: id (index) of associated bearer
+ * @tolerance: link tolerance to be used by link
+ * @net_plane: network plane (A,B,c..) this link belongs to
+ * @mtu: mtu to be advertised by link
+ * @priority: priority to be used by link
+ * @window: send window to be used by link
+ * @session: session to be used by link
  * @ownnode: identity of own node
- * @peer: identity of peer node
- * @maddr: media address to be used
+ * @peer: node id of peer node
+ * @peer_caps: bitmap describing peer node capabilities
+ * @bc_sndlink: the namespace global link used for broadcast sending
+ * @bc_rcvlink: the peer specific link used for broadcast reception
  * @inputq: queue to put messages ready for delivery
  * @namedq: queue to put binding table update messages ready for delivery
  * @link: return value, pointer to put the created link
  *
  * Returns true if link was created, otherwise false
  */
-bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
-		      u32 ownnode, u32 peer, struct tipc_media_addr *maddr,
-		      struct sk_buff_head *inputq, struct sk_buff_head *namedq,
+bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
+		      int tolerance, char net_plane, u32 mtu, int priority,
+		      int window, u32 session, u32 ownnode, u32 peer,
+		      u16 peer_caps,
+		      struct tipc_link *bc_sndlink,
+		      struct tipc_link *bc_rcvlink,
+		      struct sk_buff_head *inputq,
+		      struct sk_buff_head *namedq,
 		      struct tipc_link **link)
 {
 	struct tipc_link *l;
 	struct tipc_msg *hdr;
-	char *if_name;
 
 	l = kzalloc(sizeof(*l), GFP_ATOMIC);
 	if (!l)
 		return false;
 	*link = l;
+	l->pmsg = (struct tipc_msg *)&l->proto_msg;
+	hdr = l->pmsg;
+	tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
+	msg_set_size(hdr, sizeof(l->proto_msg));
+	msg_set_session(hdr, session);
+	msg_set_bearer_id(hdr, l->bearer_id);
 
 	/* Note: peer i/f name is completed by reset/activate message */
-	if_name = strchr(b->name, ':') + 1;
 	sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
 		tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
 		if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
+	strcpy((char *)msg_data(hdr), if_name);
 
 	l->addr = peer;
-	l->media_addr = maddr;
-	l->owner = n;
+	l->peer_caps = peer_caps;
+	l->net = net;
 	l->peer_session = WILDCARD_SESSION;
-	l->bearer_id = b->identity;
-	l->tolerance = b->tolerance;
-	l->net_plane = b->net_plane;
-	l->advertised_mtu = b->mtu;
-	l->mtu = b->mtu;
-	l->priority = b->priority;
-	tipc_link_set_queue_limits(l, b->window);
+	l->bearer_id = bearer_id;
+	l->tolerance = tolerance;
+	l->net_plane = net_plane;
+	l->advertised_mtu = mtu;
+	l->mtu = mtu;
+	l->priority = priority;
+	tipc_link_set_queue_limits(l, window);
+	l->ackers = 1;
+	l->bc_sndlink = bc_sndlink;
+	l->bc_rcvlink = bc_rcvlink;
 	l->inputq = inputq;
 	l->namedq = namedq;
 	l->state = LINK_RESETTING;
-	l->pmsg = (struct tipc_msg *)&l->proto_msg;
-	hdr = l->pmsg;
-	tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
-	msg_set_size(hdr, sizeof(l->proto_msg));
-	msg_set_session(hdr, session);
-	msg_set_bearer_id(hdr, l->bearer_id);
-	strcpy((char *)msg_data(hdr), if_name);
 	__skb_queue_head_init(&l->transmq);
 	__skb_queue_head_init(&l->backlogq);
 	__skb_queue_head_init(&l->deferdq);
@@ -214,27 +307,43 @@ bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
 	return true;
 }
 
-/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints.
+/**
+ * tipc_link_bc_create - create new link to be used for broadcast
+ * @n: pointer to associated node
+ * @mtu: mtu to be used
+ * @window: send window to be used
+ * @inputq: queue to put messages ready for delivery
+ * @namedq: queue to put binding table update messages ready for delivery
+ * @link: return value, pointer to put the created link
  *
- * Give a newly added peer node the sequence number where it should
- * start receiving and acking broadcast packets.
+ * Returns true if link was created, otherwise false
  */
-void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
-				    struct sk_buff_head *xmitq)
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+			 int mtu, int window, u16 peer_caps,
+			 struct sk_buff_head *inputq,
+			 struct sk_buff_head *namedq,
+			 struct tipc_link *bc_sndlink,
+			 struct tipc_link **link)
 {
-	struct sk_buff *skb;
-	struct sk_buff_head list;
-	u16 last_sent;
+	struct tipc_link *l;
 
-	skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
-			      0, l->addr, link_own_addr(l), 0, 0, 0);
-	if (!skb)
-		return;
-	last_sent = tipc_bclink_get_last_sent(l->owner->net);
-	msg_set_last_bcast(buf_msg(skb), last_sent);
-	__skb_queue_head_init(&list);
-	__skb_queue_tail(&list, skb);
-	tipc_link_xmit(l, &list, xmitq);
+	if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
+			      0, ownnode, peer, peer_caps, bc_sndlink,
+			      NULL, inputq, namedq, link))
+		return false;
+
+	l = *link;
+	strcpy(l->name, tipc_bclink_name);
+	tipc_link_reset(l);
+	l->state = LINK_RESET;
+	l->ackers = 0;
+	l->bc_rcvlink = l;
+
+	/* Broadcast send link is always up */
+	if (link_is_bc_sndlink(l))
+		l->state = LINK_ESTABLISHED;
+
+	return true;
 }
 
 /**
@@ -321,14 +430,15 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
 		switch (evt) {
 		case LINK_ESTABLISH_EVT:
 			l->state = LINK_ESTABLISHED;
-			rc |= TIPC_LINK_UP_EVT;
 			break;
 		case LINK_FAILOVER_BEGIN_EVT:
 			l->state = LINK_FAILINGOVER;
 			break;
-		case LINK_PEER_RESET_EVT:
 		case LINK_RESET_EVT:
+			l->state = LINK_RESET;
+			break;
 		case LINK_FAILURE_EVT:
+		case LINK_PEER_RESET_EVT:
 		case LINK_SYNCH_BEGIN_EVT:
 		case LINK_FAILOVER_END_EVT:
 			break;
@@ -440,12 +550,17 @@ static void link_profile_stats(struct tipc_link *l)
 
 /* tipc_link_timeout - perform periodic task as instructed from node timeout
  */
+/* tipc_link_timeout - perform periodic task as instructed from node timeout
+ */
 int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
 {
 	int rc = 0;
 	int mtyp = STATE_MSG;
 	bool xmit = false;
 	bool prb = false;
+	u16 bc_snt = l->bc_sndlink->snd_nxt - 1;
+	u16 bc_acked = l->bc_rcvlink->acked;
+	bool bc_up = link_is_up(l->bc_rcvlink);
 
 	link_profile_stats(l);
 
@@ -453,7 +568,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
 	case LINK_ESTABLISHED:
 	case LINK_SYNCHING:
 		if (!l->silent_intv_cnt) {
-			if (tipc_bclink_acks_missing(l->owner))
+			if (bc_up && (bc_acked != bc_snt))
 				xmit = true;
 		} else if (l->silent_intv_cnt <= l->abort_limit) {
 			xmit = true;
@@ -544,42 +659,8 @@ void link_prepare_wakeup(struct tipc_link *l)
 	}
 }
 
-/**
- * tipc_link_reset_fragments - purge link's inbound message fragments queue
- * @l_ptr: pointer to link
- */
-void tipc_link_reset_fragments(struct tipc_link *l_ptr)
-{
-	kfree_skb(l_ptr->reasm_buf);
-	l_ptr->reasm_buf = NULL;
-}
-
-void tipc_link_purge_backlog(struct tipc_link *l)
-{
-	__skb_queue_purge(&l->backlogq);
-	l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
-	l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
-	l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
-	l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
-	l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
-}
-
-/**
- * tipc_link_purge_queues - purge all pkt queues associated with link
- * @l_ptr: pointer to link
- */
-void tipc_link_purge_queues(struct tipc_link *l_ptr)
-{
-	__skb_queue_purge(&l_ptr->deferdq);
-	__skb_queue_purge(&l_ptr->transmq);
-	tipc_link_purge_backlog(l_ptr);
-	tipc_link_reset_fragments(l_ptr);
-}
-
 void tipc_link_reset(struct tipc_link *l)
 {
-	tipc_link_fsm_evt(l, LINK_RESET_EVT);
-
 	/* Link is down, accept any session */
 	l->peer_session = WILDCARD_SESSION;
 
@@ -589,12 +670,16 @@ void tipc_link_reset(struct tipc_link *l)
 	/* Prepare for renewed mtu size negotiation */
 	l->mtu = l->advertised_mtu;
 
-	/* Clean up all queues: */
+	/* Clean up all queues and counters: */
 	__skb_queue_purge(&l->transmq);
 	__skb_queue_purge(&l->deferdq);
 	skb_queue_splice_init(&l->wakeupq, l->inputq);
-
-	tipc_link_purge_backlog(l);
+	__skb_queue_purge(&l->backlogq);
+	l->backlog[TIPC_LOW_IMPORTANCE].len = 0;
+	l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0;
+	l->backlog[TIPC_HIGH_IMPORTANCE].len = 0;
+	l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0;
+	l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0;
 	kfree_skb(l->reasm_buf);
 	kfree_skb(l->failover_reasm_skb);
 	l->reasm_buf = NULL;
@@ -602,81 +687,15 @@ void tipc_link_reset(struct tipc_link *l)
 	l->rcv_unacked = 0;
 	l->snd_nxt = 1;
 	l->rcv_nxt = 1;
+	l->acked = 0;
 	l->silent_intv_cnt = 0;
 	l->stats.recv_info = 0;
 	l->stale_count = 0;
+	l->bc_peer_is_up = false;
 	link_reset_statistics(l);
 }
 
 /**
- * __tipc_link_xmit(): same as tipc_link_xmit, but destlink is known & locked
- * @link: link to use
- * @list: chain of buffers containing message
- *
- * Consumes the buffer chain, except when returning an error code,
- * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
- * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
- */
-int __tipc_link_xmit(struct net *net, struct tipc_link *link,
-		     struct sk_buff_head *list)
-{
-	struct tipc_msg *msg = buf_msg(skb_peek(list));
-	unsigned int maxwin = link->window;
-	unsigned int i, imp = msg_importance(msg);
-	uint mtu = link->mtu;
-	u16 ack = mod(link->rcv_nxt - 1);
-	u16 seqno = link->snd_nxt;
-	u16 bc_last_in = link->owner->bclink.last_in;
-	struct tipc_media_addr *addr = link->media_addr;
-	struct sk_buff_head *transmq = &link->transmq;
-	struct sk_buff_head *backlogq = &link->backlogq;
-	struct sk_buff *skb, *bskb;
-
-	/* Match msg importance against this and all higher backlog limits: */
-	for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
-		if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
-			return link_schedule_user(link, list);
-	}
-	if (unlikely(msg_size(msg) > mtu))
-		return -EMSGSIZE;
-
-	/* Prepare each packet for sending, and add to relevant queue: */
-	while (skb_queue_len(list)) {
-		skb = skb_peek(list);
-		msg = buf_msg(skb);
-		msg_set_seqno(msg, seqno);
-		msg_set_ack(msg, ack);
-		msg_set_bcast_ack(msg, bc_last_in);
-
-		if (likely(skb_queue_len(transmq) < maxwin)) {
-			__skb_dequeue(list);
-			__skb_queue_tail(transmq, skb);
-			tipc_bearer_send(net, link->bearer_id, skb, addr);
-			link->rcv_unacked = 0;
-			seqno++;
-			continue;
-		}
-		if (tipc_msg_bundle(skb_peek_tail(backlogq), msg, mtu)) {
-			kfree_skb(__skb_dequeue(list));
-			link->stats.sent_bundled++;
-			continue;
-		}
-		if (tipc_msg_make_bundle(&bskb, msg, mtu, link->addr)) {
-			kfree_skb(__skb_dequeue(list));
-			__skb_queue_tail(backlogq, bskb);
-			link->backlog[msg_importance(buf_msg(bskb))].len++;
-			link->stats.sent_bundled++;
-			link->stats.sent_bundles++;
-			continue;
-		}
-		link->backlog[imp].len += skb_queue_len(list);
-		skb_queue_splice_tail_init(list, backlogq);
-	}
-	link->snd_nxt = seqno;
-	return 0;
-}
-
-/**
  * tipc_link_xmit(): enqueue buffer list according to queue situation
  * @link: link to use
  * @list: chain of buffers containing message
@@ -696,7 +715,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 	unsigned int mtu = l->mtu;
 	u16 ack = l->rcv_nxt - 1;
 	u16 seqno = l->snd_nxt;
-	u16 bc_last_in = l->owner->bclink.last_in;
+	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
 	struct sk_buff_head *transmq = &l->transmq;
 	struct sk_buff_head *backlogq = &l->backlogq;
 	struct sk_buff *skb, *_skb, *bskb;
@@ -715,7 +734,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 		hdr = buf_msg(skb);
 		msg_set_seqno(hdr, seqno);
 		msg_set_ack(hdr, ack);
-		msg_set_bcast_ack(hdr, bc_last_in);
+		msg_set_bcast_ack(hdr, bc_ack);
 
 		if (likely(skb_queue_len(transmq) < maxwin)) {
 			_skb = skb_clone(skb, GFP_ATOMIC);
@@ -724,6 +743,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 			__skb_dequeue(list);
 			__skb_queue_tail(transmq, skb);
 			__skb_queue_tail(xmitq, _skb);
+			TIPC_SKB_CB(skb)->ackers = l->ackers;
 			l->rcv_unacked = 0;
 			seqno++;
 			continue;
@@ -748,62 +768,13 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
 	return 0;
 }
 
-/*
- * tipc_link_sync_rcv - synchronize broadcast link endpoints.
- * Receive the sequence number where we should start receiving and
- * acking broadcast packets from a newly added peer node, and open
- * up for reception of such packets.
- *
- * Called with node locked
- */
-static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf)
-{
-	struct tipc_msg *msg = buf_msg(buf);
-
-	n->bclink.last_sent = n->bclink.last_in = msg_last_bcast(msg);
-	n->bclink.recv_permitted = true;
-	kfree_skb(buf);
-}
-
-/*
- * tipc_link_push_packets - push unsent packets to bearer
- *
- * Push out the unsent messages of a link where congestion
- * has abated. Node is locked.
- *
- * Called with node locked
- */
-void tipc_link_push_packets(struct tipc_link *link)
-{
-	struct sk_buff *skb;
-	struct tipc_msg *msg;
-	u16 seqno = link->snd_nxt;
-	u16 ack = mod(link->rcv_nxt - 1);
-
-	while (skb_queue_len(&link->transmq) < link->window) {
-		skb = __skb_dequeue(&link->backlogq);
-		if (!skb)
-			break;
-		msg = buf_msg(skb);
-		link->backlog[msg_importance(msg)].len--;
-		msg_set_ack(msg, ack);
-		msg_set_seqno(msg, seqno);
-		seqno = mod(seqno + 1);
-		msg_set_bcast_ack(msg, link->owner->bclink.last_in);
-		link->rcv_unacked = 0;
-		__skb_queue_tail(&link->transmq, skb);
-		tipc_bearer_send(link->owner->net, link->bearer_id,
-				 skb, link->media_addr);
-	}
-	link->snd_nxt = seqno;
-}
-
 void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
 {
 	struct sk_buff *skb, *_skb;
 	struct tipc_msg *hdr;
 	u16 seqno = l->snd_nxt;
 	u16 ack = l->rcv_nxt - 1;
+	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
 
 	while (skb_queue_len(&l->transmq) < l->window) {
 		skb = skb_peek(&l->backlogq);
@@ -817,96 +788,35 @@ void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
 		l->backlog[msg_importance(hdr)].len--;
 		__skb_queue_tail(&l->transmq, skb);
 		__skb_queue_tail(xmitq, _skb);
-		msg_set_ack(hdr, ack);
+		TIPC_SKB_CB(skb)->ackers = l->ackers;
 		msg_set_seqno(hdr, seqno);
-		msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+		msg_set_ack(hdr, ack);
+		msg_set_bcast_ack(hdr, bc_ack);
 		l->rcv_unacked = 0;
 		seqno++;
 	}
 	l->snd_nxt = seqno;
 }
 
-static void link_retransmit_failure(struct tipc_link *l_ptr,
-				    struct sk_buff *buf)
-{
-	struct tipc_msg *msg = buf_msg(buf);
-	struct net *net = l_ptr->owner->net;
-
-	pr_warn("Retransmission failure on link <%s>\n", l_ptr->name);
-
-	if (l_ptr->addr) {
-		/* Handle failure on standard link */
-		link_print(l_ptr, "Resetting link ");
-		pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
-			msg_user(msg), msg_type(msg), msg_size(msg),
-			msg_errcode(msg));
-		pr_info("sqno %u, prev: %x, src: %x\n",
-			msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg));
-	} else {
-		/* Handle failure on broadcast link */
-		struct tipc_node *n_ptr;
-		char addr_string[16];
-
-		pr_info("Msg seq number: %u,  ", msg_seqno(msg));
-		pr_cont("Outstanding acks: %lu\n",
-			(unsigned long) TIPC_SKB_CB(buf)->handle);
-
-		n_ptr = tipc_bclink_retransmit_to(net);
-
-		tipc_addr_string_fill(addr_string, n_ptr->addr);
-		pr_info("Broadcast link info for %s\n", addr_string);
-		pr_info("Reception permitted: %d,  Acked: %u\n",
-			n_ptr->bclink.recv_permitted,
-			n_ptr->bclink.acked);
-		pr_info("Last in: %u,  Oos state: %u,  Last sent: %u\n",
-			n_ptr->bclink.last_in,
-			n_ptr->bclink.oos_state,
-			n_ptr->bclink.last_sent);
-
-		n_ptr->action_flags |= TIPC_BCAST_RESET;
-		l_ptr->stale_count = 0;
-	}
-}
-
-void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
-			  u32 retransmits)
+static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb)
 {
-	struct tipc_msg *msg;
-
-	if (!skb)
-		return;
-
-	msg = buf_msg(skb);
-
-	/* Detect repeated retransmit failures */
-	if (l_ptr->last_retransm == msg_seqno(msg)) {
-		if (++l_ptr->stale_count > 100) {
-			link_retransmit_failure(l_ptr, skb);
-			return;
-		}
-	} else {
-		l_ptr->last_retransm = msg_seqno(msg);
-		l_ptr->stale_count = 1;
-	}
+	struct tipc_msg *hdr = buf_msg(skb);
 
-	skb_queue_walk_from(&l_ptr->transmq, skb) {
-		if (!retransmits)
-			break;
-		msg = buf_msg(skb);
-		msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
-		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
-		tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb,
-				 l_ptr->media_addr);
-		retransmits--;
-		l_ptr->stats.retransmitted++;
-	}
+	pr_warn("Retransmission failure on link <%s>\n", l->name);
+	link_print(l, "Resetting link ");
+	pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+		msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr));
+	pr_info("sqno %u, prev: %x, src: %x\n",
+		msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr));
 }
 
-static int tipc_link_retransm(struct tipc_link *l, int retransm,
-			      struct sk_buff_head *xmitq)
+int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to,
+		      struct sk_buff_head *xmitq)
 {
 	struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
 	struct tipc_msg *hdr;
+	u16 ack = l->rcv_nxt - 1;
+	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
 
 	if (!skb)
 		return 0;
@@ -919,19 +829,25 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm,
 		link_retransmit_failure(l, skb);
 		return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 	}
+
+	/* Move forward to where retransmission should start */
 	skb_queue_walk(&l->transmq, skb) {
-		if (!retransm)
-			return 0;
+		if (!less(buf_seqno(skb), from))
+			break;
+	}
+
+	skb_queue_walk_from(&l->transmq, skb) {
+		if (more(buf_seqno(skb), to))
+			break;
 		hdr = buf_msg(skb);
 		_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
 		if (!_skb)
 			return 0;
 		hdr = buf_msg(_skb);
-		msg_set_ack(hdr, l->rcv_nxt - 1);
-		msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+		msg_set_ack(hdr, ack);
+		msg_set_bcast_ack(hdr, bc_ack);
 		_skb->priority = TC_PRIO_CONTROL;
 		__skb_queue_tail(xmitq, _skb);
-		retransm--;
 		l->stats.retransmitted++;
 	}
 	return 0;
@@ -942,22 +858,20 @@ static int tipc_link_retransm(struct tipc_link *l, int retransm,
  * Consumes buffer if message is of right type
  * Node lock must be held
  */
-static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
+static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
 			    struct sk_buff_head *inputq)
 {
-	struct tipc_node *node = link->owner;
-
 	switch (msg_user(buf_msg(skb))) {
 	case TIPC_LOW_IMPORTANCE:
 	case TIPC_MEDIUM_IMPORTANCE:
 	case TIPC_HIGH_IMPORTANCE:
 	case TIPC_CRITICAL_IMPORTANCE:
 	case CONN_MANAGER:
-		__skb_queue_tail(inputq, skb);
+		skb_queue_tail(inputq, skb);
 		return true;
 	case NAME_DISTRIBUTOR:
-		node->bclink.recv_permitted = true;
-		skb_queue_tail(link->namedq, skb);
+		l->bc_rcvlink->state = LINK_ESTABLISHED;
+		skb_queue_tail(l->namedq, skb);
 		return true;
 	case MSG_BUNDLER:
 	case TUNNEL_PROTOCOL:
@@ -978,10 +892,10 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
 static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
 			   struct sk_buff_head *inputq)
 {
-	struct tipc_node *node = l->owner;
 	struct tipc_msg *hdr = buf_msg(skb);
 	struct sk_buff **reasm_skb = &l->reasm_buf;
 	struct sk_buff *iskb;
+	struct sk_buff_head tmpq;
 	int usr = msg_user(hdr);
 	int rc = 0;
 	int pos = 0;
@@ -1006,23 +920,27 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
 	}
 
 	if (usr == MSG_BUNDLER) {
+		skb_queue_head_init(&tmpq);
 		l->stats.recv_bundles++;
 		l->stats.recv_bundled += msg_msgcnt(hdr);
 		while (tipc_msg_extract(skb, &iskb, &pos))
-			tipc_data_input(l, iskb, inputq);
+			tipc_data_input(l, iskb, &tmpq);
+		tipc_skb_queue_splice_tail(&tmpq, inputq);
 		return 0;
 	} else if (usr == MSG_FRAGMENTER) {
 		l->stats.recv_fragments++;
 		if (tipc_buf_append(reasm_skb, &skb)) {
 			l->stats.recv_fragmented++;
 			tipc_data_input(l, skb, inputq);
-		} else if (!*reasm_skb) {
+		} else if (!*reasm_skb && !link_is_bc_rcvlink(l)) {
+			pr_warn_ratelimited("Unable to build fragment list\n");
 			return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 		}
 		return 0;
 	} else if (usr == BCAST_PROTOCOL) {
-		tipc_link_sync_rcv(node, skb);
-		return 0;
+		tipc_bcast_lock(l->net);
+		tipc_link_bc_init_rcv(l->bc_rcvlink, hdr);
+		tipc_bcast_unlock(l->net);
 	}
 drop:
 	kfree_skb(skb);
@@ -1044,49 +962,95 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
 	return released;
 }
 
+/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission
+ *
+ * Note that sending of broadcast ack is coordinated among nodes, to reduce
+ * risk of ack storms towards the sender
+ */
+int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
+{
+	if (!l)
+		return 0;
+
+	/* Broadcast ACK must be sent via a unicast link => defer to caller */
+	if (link_is_bc_rcvlink(l)) {
+		if (((l->rcv_nxt ^ link_own_addr(l)) & 0xf) != 0xf)
+			return 0;
+		l->rcv_unacked = 0;
+		return TIPC_LINK_SND_BC_ACK;
+	}
+
+	/* Unicast ACK */
+	l->rcv_unacked = 0;
+	l->stats.sent_acks++;
+	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+	return 0;
+}
+
+/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message
+ */
+void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
+{
+	int mtyp = RESET_MSG;
+
+	if (l->state == LINK_ESTABLISHING)
+		mtyp = ACTIVATE_MSG;
+
+	tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
+}
+
+/* tipc_link_build_nack_msg: prepare link nack message for transmission
+ */
+static void tipc_link_build_nack_msg(struct tipc_link *l,
+				     struct sk_buff_head *xmitq)
+{
+	u32 def_cnt = ++l->stats.deferred_recv;
+
+	if (link_is_bc_rcvlink(l))
+		return;
+
+	if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
+		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+}
+
 /* tipc_link_rcv - process TIPC packets/messages arriving from off-node
- * @link: the link that should handle the message
+ * @l: the link that should handle the message
  * @skb: TIPC packet
  * @xmitq: queue to place packets to be sent after this call
  */
 int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 		  struct sk_buff_head *xmitq)
 {
-	struct sk_buff_head *arrvq = &l->deferdq;
-	struct sk_buff_head tmpq;
+	struct sk_buff_head *defq = &l->deferdq;
 	struct tipc_msg *hdr;
-	u16 seqno, rcv_nxt;
+	u16 seqno, rcv_nxt, win_lim;
 	int rc = 0;
 
-	__skb_queue_head_init(&tmpq);
-
-	if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) {
-		if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV))
-			tipc_link_build_proto_msg(l, STATE_MSG, 0,
-						  0, 0, 0, xmitq);
-		return rc;
-	}
-
-	while ((skb = skb_peek(arrvq))) {
+	do {
 		hdr = buf_msg(skb);
+		seqno = msg_seqno(hdr);
+		rcv_nxt = l->rcv_nxt;
+		win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
 
 		/* Verify and update link state */
-		if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) {
-			__skb_dequeue(arrvq);
-			rc = tipc_link_proto_rcv(l, skb, xmitq);
-			continue;
-		}
+		if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
+			return tipc_link_proto_rcv(l, skb, xmitq);
 
 		if (unlikely(!link_is_up(l))) {
-			rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
-			if (!link_is_up(l)) {
-				kfree_skb(__skb_dequeue(arrvq));
-				goto exit;
-			}
+			if (l->state == LINK_ESTABLISHING)
+				rc = TIPC_LINK_UP_EVT;
+			goto drop;
 		}
 
+		/* Don't send probe at next timeout expiration */
 		l->silent_intv_cnt = 0;
 
+		/* Drop if outside receive window */
+		if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
+			l->stats.duplicates++;
+			goto drop;
+		}
+
 		/* Forward queues and wake up waiting users */
 		if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
 			tipc_link_advance_backlog(l, xmitq);
@@ -1094,79 +1058,28 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 				link_prepare_wakeup(l);
 		}
 
-		/* Defer reception if there is a gap in the sequence */
-		seqno = msg_seqno(hdr);
-		rcv_nxt = l->rcv_nxt;
-		if (unlikely(less(rcv_nxt, seqno))) {
-			l->stats.deferred_recv++;
-			goto exit;
-		}
-
-		__skb_dequeue(arrvq);
-
-		/* Drop if packet already received */
-		if (unlikely(more(rcv_nxt, seqno))) {
-			l->stats.duplicates++;
-			kfree_skb(skb);
-			goto exit;
+		/* Defer delivery if sequence gap */
+		if (unlikely(seqno != rcv_nxt)) {
+			__tipc_skb_queue_sorted(defq, seqno, skb);
+			tipc_link_build_nack_msg(l, xmitq);
+			break;
 		}
 
-		/* Packet can be delivered */
+		/* Deliver packet */
 		l->rcv_nxt++;
 		l->stats.recv_info++;
-		if (unlikely(!tipc_data_input(l, skb, &tmpq)))
-			rc = tipc_link_input(l, skb, &tmpq);
-
-		/* Ack at regular intervals */
-		if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
-			l->rcv_unacked = 0;
-			l->stats.sent_acks++;
-			tipc_link_build_proto_msg(l, STATE_MSG,
-						  0, 0, 0, 0, xmitq);
-		}
-	}
-exit:
-	tipc_skb_queue_splice_tail(&tmpq, l->inputq);
-	return rc;
-}
-
-/**
- * tipc_link_defer_pkt - Add out-of-sequence message to deferred reception queue
- *
- * Returns increase in queue length (i.e. 0 or 1)
- */
-u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
-{
-	struct sk_buff *skb1;
-	u16 seq_no = buf_seqno(skb);
-
-	/* Empty queue ? */
-	if (skb_queue_empty(list)) {
-		__skb_queue_tail(list, skb);
-		return 1;
-	}
-
-	/* Last ? */
-	if (less(buf_seqno(skb_peek_tail(list)), seq_no)) {
-		__skb_queue_tail(list, skb);
-		return 1;
-	}
-
-	/* Locate insertion point in queue, then insert; discard if duplicate */
-	skb_queue_walk(list, skb1) {
-		u16 curr_seqno = buf_seqno(skb1);
-
-		if (seq_no == curr_seqno) {
-			kfree_skb(skb);
-			return 0;
-		}
-
-		if (less(seq_no, curr_seqno))
+		if (!tipc_data_input(l, skb, l->inputq))
+			rc |= tipc_link_input(l, skb, l->inputq);
+		if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
+			rc |= tipc_link_build_ack_msg(l, xmitq);
+		if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK))
 			break;
-	}
+	} while ((skb = __skb_dequeue(defq)));
 
-	__skb_queue_before(list, skb1, skb);
-	return 1;
+	return rc;
+drop:
+	kfree_skb(skb);
+	return rc;
 }
 
 /*
@@ -1184,23 +1097,17 @@ void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg,
 	skb = __skb_dequeue(&xmitq);
 	if (!skb)
 		return;
-	tipc_bearer_send(l->owner->net, l->bearer_id, skb, l->media_addr);
+	tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr);
 	l->rcv_unacked = 0;
-	kfree_skb(skb);
 }
 
-/* tipc_link_build_proto_msg: prepare link protocol message for transmission
- */
 static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 				      u16 rcvgap, int tolerance, int priority,
 				      struct sk_buff_head *xmitq)
 {
 	struct sk_buff *skb = NULL;
 	struct tipc_msg *hdr = l->pmsg;
-	u16 snd_nxt = l->snd_nxt;
-	u16 rcv_nxt = l->rcv_nxt;
-	u16 rcv_last = rcv_nxt - 1;
-	int node_up = l->owner->bclink.recv_permitted;
+	bool node_up = link_is_up(l->bc_rcvlink);
 
 	/* Don't send protocol message during reset or link failover */
 	if (tipc_link_is_blocked(l))
@@ -1208,33 +1115,34 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 
 	msg_set_type(hdr, mtyp);
 	msg_set_net_plane(hdr, l->net_plane);
-	msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
-	msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net));
+	msg_set_next_sent(hdr, l->snd_nxt);
+	msg_set_ack(hdr, l->rcv_nxt - 1);
+	msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1);
+	msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
 	msg_set_link_tolerance(hdr, tolerance);
 	msg_set_linkprio(hdr, priority);
 	msg_set_redundant_link(hdr, node_up);
 	msg_set_seq_gap(hdr, 0);
 
 	/* Compatibility: created msg must not be in sequence with pkt flow */
-	msg_set_seqno(hdr, snd_nxt + U16_MAX / 2);
+	msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
 
 	if (mtyp == STATE_MSG) {
 		if (!tipc_link_is_up(l))
 			return;
-		msg_set_next_sent(hdr, snd_nxt);
 
 		/* Override rcvgap if there are packets in deferred queue */
 		if (!skb_queue_empty(&l->deferdq))
-			rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt;
+			rcvgap = buf_seqno(skb_peek(&l->deferdq)) - l->rcv_nxt;
 		if (rcvgap) {
 			msg_set_seq_gap(hdr, rcvgap);
 			l->stats.sent_nacks++;
 		}
-		msg_set_ack(hdr, rcv_last);
 		msg_set_probe(hdr, probe);
 		if (probe)
 			l->stats.sent_probes++;
 		l->stats.sent_states++;
+		l->rcv_unacked = 0;
 	} else {
 		/* RESET_MSG or ACTIVATE_MSG */
 		msg_set_max_pkt(hdr, l->advertised_mtu);
@@ -1250,7 +1158,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 }
 
 /* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
- * with contents of the link's tranmsit and backlog queues.
+ * with contents of the link's transmit and backlog queues.
  */
 void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
 			   int mtyp, struct sk_buff_head *xmitq)
@@ -1326,21 +1234,23 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 {
 	struct tipc_msg *hdr = buf_msg(skb);
 	u16 rcvgap = 0;
-	u16 nacked_gap = msg_seq_gap(hdr);
+	u16 ack = msg_ack(hdr);
+	u16 gap = msg_seq_gap(hdr);
 	u16 peers_snd_nxt =  msg_next_sent(hdr);
 	u16 peers_tol = msg_link_tolerance(hdr);
 	u16 peers_prio = msg_linkprio(hdr);
 	u16 rcv_nxt = l->rcv_nxt;
+	int mtyp = msg_type(hdr);
 	char *if_name;
 	int rc = 0;
 
-	if (tipc_link_is_blocked(l))
+	if (tipc_link_is_blocked(l) || !xmitq)
 		goto exit;
 
 	if (link_own_addr(l) > msg_prevnode(hdr))
 		l->net_plane = msg_net_plane(hdr);
 
-	switch (msg_type(hdr)) {
+	switch (mtyp) {
 	case RESET_MSG:
 
 		/* Ignore duplicate RESET with old session number */
@@ -1367,12 +1277,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
 			l->priority = peers_prio;
 
-		if (msg_type(hdr) == RESET_MSG) {
-			rc |= tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
-		} else if (!link_is_up(l)) {
-			tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
-			rc |= tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
-		}
+		/* ACTIVATE_MSG serves as PEER_RESET if link is already down */
+		if ((mtyp == RESET_MSG) || !link_is_up(l))
+			rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
+
+		/* ACTIVATE_MSG takes up link if it was already locally reset */
+		if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
+			rc = TIPC_LINK_UP_EVT;
+
 		l->peer_session = msg_session(hdr);
 		l->peer_bearer_id = msg_bearer_id(hdr);
 		if (l->mtu > msg_max_pkt(hdr))
@@ -1389,9 +1301,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		l->stats.recv_states++;
 		if (msg_probe(hdr))
 			l->stats.recv_probes++;
-		rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
-		if (!link_is_up(l))
+
+		if (!link_is_up(l)) {
+			if (l->state == LINK_ESTABLISHING)
+				rc = TIPC_LINK_UP_EVT;
 			break;
+		}
 
 		/* Send NACK if peer has sent pkts we haven't received yet */
 		if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
@@ -1399,11 +1314,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		if (rcvgap || (msg_probe(hdr)))
 			tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
 						  0, 0, xmitq);
-		tipc_link_release_pkts(l, msg_ack(hdr));
+		tipc_link_release_pkts(l, ack);
 
 		/* If NACK, retransmit will now start at right position */
-		if (nacked_gap) {
-			rc = tipc_link_retransm(l, nacked_gap, xmitq);
+		if (gap) {
+			rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq);
 			l->stats.recv_nacks++;
 		}
 
@@ -1416,6 +1331,188 @@ exit:
 	return rc;
 }
 
+/* tipc_link_build_bc_proto_msg() - create broadcast protocol message
+ */
+static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast,
+					 u16 peers_snd_nxt,
+					 struct sk_buff_head *xmitq)
+{
+	struct sk_buff *skb;
+	struct tipc_msg *hdr;
+	struct sk_buff *dfrd_skb = skb_peek(&l->deferdq);
+	u16 ack = l->rcv_nxt - 1;
+	u16 gap_to = peers_snd_nxt - 1;
+
+	skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
+			      0, l->addr, link_own_addr(l), 0, 0, 0);
+	if (!skb)
+		return false;
+	hdr = buf_msg(skb);
+	msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
+	msg_set_bcast_ack(hdr, ack);
+	msg_set_bcgap_after(hdr, ack);
+	if (dfrd_skb)
+		gap_to = buf_seqno(dfrd_skb) - 1;
+	msg_set_bcgap_to(hdr, gap_to);
+	msg_set_non_seq(hdr, bcast);
+	__skb_queue_tail(xmitq, skb);
+	return true;
+}
+
+/* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints.
+ *
+ * Give a newly added peer node the sequence number where it should
+ * start receiving and acking broadcast packets.
+ */
+static void tipc_link_build_bc_init_msg(struct tipc_link *l,
+					struct sk_buff_head *xmitq)
+{
+	struct sk_buff_head list;
+
+	__skb_queue_head_init(&list);
+	if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list))
+		return;
+	tipc_link_xmit(l, &list, xmitq);
+}
+
+/* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer
+ */
+void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
+{
+	int mtyp = msg_type(hdr);
+	u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
+
+	if (link_is_up(l))
+		return;
+
+	if (msg_user(hdr) == BCAST_PROTOCOL) {
+		l->rcv_nxt = peers_snd_nxt;
+		l->state = LINK_ESTABLISHED;
+		return;
+	}
+
+	if (l->peer_caps & TIPC_BCAST_SYNCH)
+		return;
+
+	if (msg_peer_node_is_up(hdr))
+		return;
+
+	/* Compatibility: accept older, less safe initial synch data */
+	if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG))
+		l->rcv_nxt = peers_snd_nxt;
+}
+
+/* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
+ */
+void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+			   struct sk_buff_head *xmitq)
+{
+	u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
+
+	if (!link_is_up(l))
+		return;
+
+	if (!msg_peer_node_is_up(hdr))
+		return;
+
+	l->bc_peer_is_up = true;
+
+	/* Ignore if peers_snd_nxt goes beyond receive window */
+	if (more(peers_snd_nxt, l->rcv_nxt + l->window))
+		return;
+
+	if (!more(peers_snd_nxt, l->rcv_nxt)) {
+		l->nack_state = BC_NACK_SND_CONDITIONAL;
+		return;
+	}
+
+	/* Don't NACK if one was recently sent or peeked */
+	if (l->nack_state == BC_NACK_SND_SUPPRESS) {
+		l->nack_state = BC_NACK_SND_UNCONDITIONAL;
+		return;
+	}
+
+	/* Conditionally delay NACK sending until next synch rcv */
+	if (l->nack_state == BC_NACK_SND_CONDITIONAL) {
+		l->nack_state = BC_NACK_SND_UNCONDITIONAL;
+		if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN)
+			return;
+	}
+
+	/* Send NACK now but suppress next one */
+	tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq);
+	l->nack_state = BC_NACK_SND_SUPPRESS;
+}
+
+void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
+			  struct sk_buff_head *xmitq)
+{
+	struct sk_buff *skb, *tmp;
+	struct tipc_link *snd_l = l->bc_sndlink;
+
+	if (!link_is_up(l) || !l->bc_peer_is_up)
+		return;
+
+	if (!more(acked, l->acked))
+		return;
+
+	/* Skip over packets peer has already acked */
+	skb_queue_walk(&snd_l->transmq, skb) {
+		if (more(buf_seqno(skb), l->acked))
+			break;
+	}
+
+	/* Update/release the packets peer is acking now */
+	skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) {
+		if (more(buf_seqno(skb), acked))
+			break;
+		if (!--TIPC_SKB_CB(skb)->ackers) {
+			__skb_unlink(skb, &snd_l->transmq);
+			kfree_skb(skb);
+		}
+	}
+	l->acked = acked;
+	tipc_link_advance_backlog(snd_l, xmitq);
+	if (unlikely(!skb_queue_empty(&snd_l->wakeupq)))
+		link_prepare_wakeup(snd_l);
+}
+
+/* tipc_link_bc_nack_rcv(): receive broadcast nack message
+ */
+int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
+			  struct sk_buff_head *xmitq)
+{
+	struct tipc_msg *hdr = buf_msg(skb);
+	u32 dnode = msg_destnode(hdr);
+	int mtyp = msg_type(hdr);
+	u16 acked = msg_bcast_ack(hdr);
+	u16 from = acked + 1;
+	u16 to = msg_bcgap_to(hdr);
+	u16 peers_snd_nxt = to + 1;
+	int rc = 0;
+
+	kfree_skb(skb);
+
+	if (!tipc_link_is_up(l) || !l->bc_peer_is_up)
+		return 0;
+
+	if (mtyp != STATE_MSG)
+		return 0;
+
+	if (dnode == link_own_addr(l)) {
+		tipc_link_bc_ack_rcv(l, acked, xmitq);
+		rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
+		l->stats.recv_nacks++;
+		return rc;
+	}
+
+	/* Msg for other node => suppress own NACK at next sync if applicable */
+	if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from))
+		l->nack_state = BC_NACK_SND_SUPPRESS;
+
+	return 0;
+}
+
 void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
 {
 	int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
@@ -1480,7 +1577,7 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
 static void link_print(struct tipc_link *l, const char *str)
 {
 	struct sk_buff *hskb = skb_peek(&l->transmq);
-	u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt;
+	u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1;
 	u16 tail = l->snd_nxt - 1;
 
 	pr_info("%s Link <%s> state %x\n", str, l->name, l->state);
@@ -1704,7 +1801,7 @@ static int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
 	if (tipc_link_is_up(link))
 		if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP))
 			goto attr_msg_full;
-	if (tipc_link_is_active(link))
+	if (link->active)
 		if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE))
 			goto attr_msg_full;
 
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 39ff8b6919a4..66d859b66c84 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -66,7 +66,8 @@ enum {
  */
 enum {
 	TIPC_LINK_UP_EVT       = 1,
-	TIPC_LINK_DOWN_EVT     = (1 << 1)
+	TIPC_LINK_DOWN_EVT     = (1 << 1),
+	TIPC_LINK_SND_BC_ACK   = (1 << 2)
 };
 
 /* Starting value for maximum packet size negotiation on unicast links
@@ -110,7 +111,7 @@ struct tipc_stats {
  * @name: link name character string
  * @media_addr: media address to use when sending messages over link
  * @timer: link timer
- * @owner: pointer to peer node
+ * @net: pointer to namespace struct
  * @refcnt: reference counter for permanent references (owner node & timer)
  * @peer_session: link session # being used by peer end of link
  * @peer_bearer_id: bearer id used by link's peer endpoint
@@ -119,6 +120,7 @@ struct tipc_stats {
  * @keepalive_intv: link keepalive timer interval
  * @abort_limit: # of unacknowledged continuity probes needed to reset link
  * @state: current state of link FSM
+ * @peer_caps: bitmap describing capabilities of peer node
  * @silent_intv_cnt: # of timer intervals without any reception from peer
  * @proto_msg: template for control messages generated by link
  * @pmsg: convenience pointer to "proto_msg" field
@@ -134,6 +136,8 @@ struct tipc_stats {
  * @snt_nxt: next sequence number to use for outbound messages
  * @last_retransmitted: sequence number of most recently retransmitted message
  * @stale_count: # of identical retransmit requests made by peer
+ * @ackers: # of peers that needs to ack each packet before it can be released
+ * @acked: # last packet acked by a certain peer. Used for broadcast.
  * @rcv_nxt: next sequence number to expect for inbound messages
  * @deferred_queue: deferred queue saved OOS b'cast message received from node
  * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
@@ -143,13 +147,14 @@ struct tipc_stats {
  * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
  * @long_msg_seq_no: next identifier to use for outbound fragmented messages
  * @reasm_buf: head of partially reassembled inbound message fragments
+ * @bc_rcvr: marks that this is a broadcast receiver link
  * @stats: collects statistics regarding link activity
  */
 struct tipc_link {
 	u32 addr;
 	char name[TIPC_MAX_LINK_NAME];
 	struct tipc_media_addr *media_addr;
-	struct tipc_node *owner;
+	struct net *net;
 
 	/* Management and link supervision data */
 	u32 peer_session;
@@ -159,6 +164,8 @@ struct tipc_link {
 	unsigned long keepalive_intv;
 	u32 abort_limit;
 	u32 state;
+	u16 peer_caps;
+	bool active;
 	u32 silent_intv_cnt;
 	struct {
 		unchar hdr[INT_H_SIZE];
@@ -185,7 +192,7 @@ struct tipc_link {
 	} backlog[5];
 	u16 snd_nxt;
 	u16 last_retransm;
-	u32 window;
+	u16 window;
 	u32 stale_count;
 
 	/* Reception */
@@ -201,42 +208,50 @@ struct tipc_link {
 	/* Fragmentation/reassembly */
 	struct sk_buff *reasm_buf;
 
+	/* Broadcast */
+	u16 ackers;
+	u16 acked;
+	struct tipc_link *bc_rcvlink;
+	struct tipc_link *bc_sndlink;
+	int nack_state;
+	bool bc_peer_is_up;
+
 	/* Statistics */
 	struct tipc_stats stats;
 };
 
-bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
-		      u32 ownnode, u32 peer, struct tipc_media_addr *maddr,
-		      struct sk_buff_head *inputq, struct sk_buff_head *namedq,
+bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
+		      int tolerance, char net_plane, u32 mtu, int priority,
+		      int window, u32 session, u32 ownnode, u32 peer,
+		      u16 peer_caps,
+		      struct tipc_link *bc_sndlink,
+		      struct tipc_link *bc_rcvlink,
+		      struct sk_buff_head *inputq,
+		      struct sk_buff_head *namedq,
 		      struct tipc_link **link);
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+			 int mtu, int window, u16 peer_caps,
+			 struct sk_buff_head *inputq,
+			 struct sk_buff_head *namedq,
+			 struct tipc_link *bc_sndlink,
+			 struct tipc_link **link);
 void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
 			   int mtyp, struct sk_buff_head *xmitq);
-void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
-				    struct sk_buff_head *xmitq);
+void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
 int tipc_link_fsm_evt(struct tipc_link *l, int evt);
 void tipc_link_reset_fragments(struct tipc_link *l_ptr);
 bool tipc_link_is_up(struct tipc_link *l);
+bool tipc_link_peer_is_down(struct tipc_link *l);
 bool tipc_link_is_reset(struct tipc_link *l);
+bool tipc_link_is_establishing(struct tipc_link *l);
 bool tipc_link_is_synching(struct tipc_link *l);
 bool tipc_link_is_failingover(struct tipc_link *l);
 bool tipc_link_is_blocked(struct tipc_link *l);
-int tipc_link_is_active(struct tipc_link *l_ptr);
-void tipc_link_purge_queues(struct tipc_link *l_ptr);
-void tipc_link_purge_backlog(struct tipc_link *l);
+void tipc_link_set_active(struct tipc_link *l, bool active);
 void tipc_link_reset(struct tipc_link *l_ptr);
-int __tipc_link_xmit(struct net *net, struct tipc_link *link,
-		     struct sk_buff_head *list);
 int tipc_link_xmit(struct tipc_link *link,	struct sk_buff_head *list,
 		   struct sk_buff_head *xmitq);
-void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
-			  u32 gap, u32 tolerance, u32 priority);
-void tipc_link_push_packets(struct tipc_link *l_ptr);
-u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *buf);
-void tipc_link_set_queue_limits(struct tipc_link *l_ptr, u32 window);
-void tipc_link_retransmit(struct tipc_link *l_ptr,
-			  struct sk_buff *start, u32 retransmits);
-struct sk_buff *tipc_skb_queue_next(const struct sk_buff_head *list,
-				    const struct sk_buff *skb);
+void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
 
 int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
@@ -246,5 +261,23 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
 int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
 int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 		  struct sk_buff_head *xmitq);
-
+int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
+void tipc_link_add_bc_peer(struct tipc_link *snd_l,
+			   struct tipc_link *uc_l,
+			   struct sk_buff_head *xmitq);
+void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
+			      struct tipc_link *rcv_l,
+			      struct sk_buff_head *xmitq);
+int tipc_link_bc_peers(struct tipc_link *l);
+void tipc_link_set_mtu(struct tipc_link *l, int mtu);
+int tipc_link_mtu(struct tipc_link *l);
+void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
+			  struct sk_buff_head *xmitq);
+void tipc_link_build_bc_sync_msg(struct tipc_link *l,
+				 struct sk_buff_head *xmitq);
+void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
+void tipc_link_bc_sync_rcv(struct tipc_link *l,   struct tipc_msg *hdr,
+			   struct sk_buff_head *xmitq);
+int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
+			  struct sk_buff_head *xmitq);
 #endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 5f73450159df..8740930f0787 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -182,7 +182,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
 	*buf = NULL;
 	return 0;
 err:
-	pr_warn_ratelimited("Unable to build fragment list\n");
 	kfree_skb(*buf);
 	kfree_skb(*headbuf);
 	*buf = *headbuf = NULL;
@@ -565,18 +564,22 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
 /* tipc_msg_reassemble() - clone a buffer chain of fragments and
  *                         reassemble the clones into one message
  */
-struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list)
+bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq)
 {
-	struct sk_buff *skb;
+	struct sk_buff *skb, *_skb;
 	struct sk_buff *frag = NULL;
 	struct sk_buff *head = NULL;
-	int hdr_sz;
+	int hdr_len;
 
 	/* Copy header if single buffer */
 	if (skb_queue_len(list) == 1) {
 		skb = skb_peek(list);
-		hdr_sz = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
-		return __pskb_copy(skb, hdr_sz, GFP_ATOMIC);
+		hdr_len = skb_headroom(skb) + msg_hdr_sz(buf_msg(skb));
+		_skb = __pskb_copy(skb, hdr_len, GFP_ATOMIC);
+		if (!_skb)
+			return false;
+		__skb_queue_tail(rcvq, _skb);
+		return true;
 	}
 
 	/* Clone all fragments and reassemble */
@@ -590,9 +593,41 @@ struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list)
 		if (!head)
 			goto error;
 	}
-	return frag;
+	__skb_queue_tail(rcvq, frag);
+	return true;
 error:
 	pr_warn("Failed do clone local mcast rcv buffer\n");
 	kfree_skb(head);
-	return NULL;
+	return false;
+}
+
+/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
+ * @list: list to be appended to
+ * @seqno: sequence number of buffer to add
+ * @skb: buffer to add
+ */
+void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+			     struct sk_buff *skb)
+{
+	struct sk_buff *_skb, *tmp;
+
+	if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
+		__skb_queue_head(list, skb);
+		return;
+	}
+
+	if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
+		__skb_queue_tail(list, skb);
+		return;
+	}
+
+	skb_queue_walk_safe(list, _skb, tmp) {
+		if (more(seqno, buf_seqno(_skb)))
+			continue;
+		if (seqno == buf_seqno(_skb))
+			break;
+		__skb_queue_before(list, _skb, skb);
+		return;
+	}
+	kfree_skb(skb);
 }
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5351a3f97e8e..55778a0aebf3 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -112,6 +112,7 @@ struct tipc_skb_cb {
 	bool wakeup_pending;
 	u16 chain_sz;
 	u16 chain_imp;
+	u16 ackers;
 };
 
 #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
@@ -600,6 +601,11 @@ static inline u32 msg_last_bcast(struct tipc_msg *m)
 	return msg_bits(m, 4, 16, 0xffff);
 }
 
+static inline u32 msg_bc_snd_nxt(struct tipc_msg *m)
+{
+	return msg_last_bcast(m) + 1;
+}
+
 static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n)
 {
 	msg_set_bits(m, 4, 16, 0xffff, n);
@@ -789,7 +795,9 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
 int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
 		   int offset, int dsz, int mtu, struct sk_buff_head *list);
 bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
-struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
+bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
+void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+			     struct sk_buff *skb);
 
 static inline u16 buf_seqno(struct sk_buff *skb)
 {
@@ -862,38 +870,6 @@ static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list,
 	return skb;
 }
 
-/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
- * @list: list to be appended to
- * @skb: buffer to add
- * Returns true if queue should treated further, otherwise false
- */
-static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list,
-					   struct sk_buff *skb)
-{
-	struct sk_buff *_skb, *tmp;
-	struct tipc_msg *hdr = buf_msg(skb);
-	u16 seqno = msg_seqno(hdr);
-
-	if (skb_queue_empty(list) || (msg_user(hdr) == LINK_PROTOCOL)) {
-		__skb_queue_head(list, skb);
-		return true;
-	}
-	if (likely(less(seqno, buf_seqno(skb_peek(list))))) {
-		__skb_queue_head(list, skb);
-		return true;
-	}
-	if (!more(seqno, buf_seqno(skb_peek_tail(list)))) {
-		skb_queue_walk_safe(list, _skb, tmp) {
-			if (likely(less(seqno, buf_seqno(_skb)))) {
-				__skb_queue_before(list, _skb, skb);
-				return true;
-			}
-		}
-	}
-	__skb_queue_tail(list, skb);
-	return false;
-}
-
 /* tipc_skb_queue_splice_tail - append an skb list to lock protected list
  * @list: the new list to append. Not lock protected
  * @head: target list. Lock protected.
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index e6018b7eb197..c07612bab95c 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -102,7 +102,7 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb)
 		if (!oskb)
 			break;
 		msg_set_destnode(buf_msg(oskb), dnode);
-		tipc_node_xmit_skb(net, oskb, dnode, dnode);
+		tipc_node_xmit_skb(net, oskb, dnode, 0);
 	}
 	rcu_read_unlock();
 
@@ -223,7 +223,7 @@ void tipc_named_node_up(struct net *net, u32 dnode)
 			 &tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
 	rcu_read_unlock();
 
-	tipc_node_xmit(net, &head, dnode, dnode);
+	tipc_node_xmit(net, &head, dnode, 0);
 }
 
 static void tipc_publ_subscribe(struct net *net, struct publication *publ,
diff --git a/net/tipc/net.c b/net/tipc/net.c
index d6d1399ae229..77bf9113c7a7 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -112,14 +112,11 @@ int tipc_net_start(struct net *net, u32 addr)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	char addr_string[16];
-	int res;
 
 	tn->own_addr = addr;
 	tipc_named_reinit(net);
 	tipc_sk_reinit(net);
-	res = tipc_bclink_init(net);
-	if (res)
-		return res;
+	tipc_bcast_reinit(net);
 
 	tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
 			     TIPC_ZONE_SCOPE, 0, tn->own_addr);
@@ -142,7 +139,6 @@ void tipc_net_stop(struct net *net)
 			      tn->own_addr);
 	rtnl_lock();
 	tipc_bearer_stop(net);
-	tipc_bclink_stop(net);
 	tipc_node_stop(net);
 	rtnl_unlock();
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2c32a83037a3..20cddec0a43c 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -72,7 +72,6 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
 static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
 				bool delete);
 static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
-static void node_established_contact(struct tipc_node *n_ptr);
 static void tipc_node_delete(struct tipc_node *node);
 static void tipc_node_timeout(unsigned long data);
 static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
@@ -165,8 +164,10 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
 	INIT_LIST_HEAD(&n_ptr->list);
 	INIT_LIST_HEAD(&n_ptr->publ_list);
 	INIT_LIST_HEAD(&n_ptr->conn_sks);
-	skb_queue_head_init(&n_ptr->bclink.namedq);
-	__skb_queue_head_init(&n_ptr->bclink.deferdq);
+	skb_queue_head_init(&n_ptr->bc_entry.namedq);
+	skb_queue_head_init(&n_ptr->bc_entry.inputq1);
+	__skb_queue_head_init(&n_ptr->bc_entry.arrvq);
+	skb_queue_head_init(&n_ptr->bc_entry.inputq2);
 	hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
 	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
 		if (n_ptr->addr < temp_node->addr)
@@ -177,6 +178,18 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
 	n_ptr->signature = INVALID_NODE_SIG;
 	n_ptr->active_links[0] = INVALID_BEARER_ID;
 	n_ptr->active_links[1] = INVALID_BEARER_ID;
+	if (!tipc_link_bc_create(net, tipc_own_addr(net), n_ptr->addr,
+				 U16_MAX, tipc_bc_sndlink(net)->window,
+				 n_ptr->capabilities,
+				 &n_ptr->bc_entry.inputq1,
+				 &n_ptr->bc_entry.namedq,
+				 tipc_bc_sndlink(net),
+				 &n_ptr->bc_entry.link)) {
+		pr_warn("Broadcast rcv link creation failed, no memory\n");
+		kfree(n_ptr);
+		n_ptr = NULL;
+		goto exit;
+	}
 	tipc_node_get(n_ptr);
 	setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr);
 	n_ptr->keepalive_intv = U32_MAX;
@@ -203,6 +216,7 @@ static void tipc_node_delete(struct tipc_node *node)
 {
 	list_del_rcu(&node->list);
 	hlist_del_rcu(&node->hash);
+	kfree(node->bc_entry.link);
 	kfree_rcu(node, rcu);
 }
 
@@ -317,7 +331,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
 	struct tipc_link *ol = node_active_link(n, 0);
 	struct tipc_link *nl = n->links[bearer_id].link;
 
-	if (!nl || !tipc_link_is_up(nl))
+	if (!nl)
+		return;
+
+	tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT);
+	if (!tipc_link_is_up(nl))
 		return;
 
 	n->working_links++;
@@ -328,6 +346,7 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
 	n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE;
 
 	tipc_bearer_add_dest(n->net, bearer_id, n->addr);
+	tipc_bcast_inc_bearer_dst_cnt(n->net, bearer_id);
 
 	pr_debug("Established link <%s> on network plane %c\n",
 		 nl->name, nl->net_plane);
@@ -336,8 +355,9 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
 	if (!ol) {
 		*slot0 = bearer_id;
 		*slot1 = bearer_id;
-		tipc_link_build_bcast_sync_msg(nl, xmitq);
-		node_established_contact(n);
+		tipc_node_fsm_evt(n, SELF_ESTABL_CONTACT_EVT);
+		n->action_flags |= TIPC_NOTIFY_NODE_UP;
+		tipc_bcast_add_peer(n->net, nl, xmitq);
 		return;
 	}
 
@@ -346,8 +366,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
 		pr_debug("Old link <%s> becomes standby\n", ol->name);
 		*slot0 = bearer_id;
 		*slot1 = bearer_id;
+		tipc_link_set_active(nl, true);
+		tipc_link_set_active(ol, false);
 	} else if (nl->priority == ol->priority) {
-		*slot0 = bearer_id;
+		tipc_link_set_active(nl, true);
+		*slot1 = bearer_id;
 	} else {
 		pr_debug("New link <%s> is standby\n", nl->name);
 	}
@@ -416,10 +439,18 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
 	}
 
 	if (!tipc_node_is_up(n)) {
+		if (tipc_link_peer_is_down(l))
+			tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
+		tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
+		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 		tipc_link_reset(l);
+		tipc_link_build_reset_msg(l, xmitq);
+		*maddr = &n->links[*bearer_id].maddr;
 		node_lost_contact(n, &le->inputq);
+		tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
 		return;
 	}
+	tipc_bcast_dec_bearer_dst_cnt(n->net, *bearer_id);
 
 	/* There is still a working link => initiate failover */
 	tnl = node_active_link(n, 0);
@@ -428,6 +459,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
 	n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1);
 	tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
 	tipc_link_reset(l);
+	tipc_link_fsm_evt(l, LINK_RESET_EVT);
 	tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
 	tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
 	*maddr = &n->links[tnl->bearer_id].maddr;
@@ -437,20 +469,28 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
 static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
 {
 	struct tipc_link_entry *le = &n->links[bearer_id];
+	struct tipc_link *l = le->link;
 	struct tipc_media_addr *maddr;
 	struct sk_buff_head xmitq;
 
+	if (!l)
+		return;
+
 	__skb_queue_head_init(&xmitq);
 
 	tipc_node_lock(n);
-	__tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
-	if (delete && le->link) {
-		kfree(le->link);
-		le->link = NULL;
-		n->link_cnt--;
+	if (!tipc_link_is_establishing(l)) {
+		__tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
+		if (delete) {
+			kfree(l);
+			le->link = NULL;
+			n->link_cnt--;
+		}
+	} else {
+		/* Defuse pending tipc_node_link_up() */
+		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 	}
 	tipc_node_unlock(n);
-
 	tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
 	tipc_sk_rcv(n->net, &le->inputq);
 }
@@ -474,6 +514,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 	bool link_up = false;
 	bool accept_addr = false;
 	bool reset = true;
+	char *if_name;
 
 	*dupl_addr = false;
 	*respond = false;
@@ -560,13 +601,20 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 			pr_warn("Cannot establish 3rd link to %x\n", n->addr);
 			goto exit;
 		}
-		if (!tipc_link_create(n, b, mod(tipc_net(net)->random),
-				      tipc_own_addr(net), onode, &le->maddr,
-				      &le->inputq, &n->bclink.namedq, &l)) {
+		if_name = strchr(b->name, ':') + 1;
+		if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
+				      b->net_plane, b->mtu, b->priority,
+				      b->window, mod(tipc_net(net)->random),
+				      tipc_own_addr(net), onode,
+				      n->capabilities,
+				      tipc_bc_sndlink(n->net), n->bc_entry.link,
+				      &le->inputq,
+				      &n->bc_entry.namedq, &l)) {
 			*respond = false;
 			goto exit;
 		}
 		tipc_link_reset(l);
+		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 		if (n->state == NODE_FAILINGOVER)
 			tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
 		le->link = l;
@@ -579,7 +627,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
 	memcpy(&le->maddr, maddr, sizeof(*maddr));
 exit:
 	tipc_node_unlock(n);
-	if (reset)
+	if (reset && !tipc_link_is_reset(l))
 		tipc_node_link_down(n, b->identity, false);
 	tipc_node_put(n);
 }
@@ -686,10 +734,10 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
 			break;
 		case SELF_ESTABL_CONTACT_EVT:
 		case PEER_LOST_CONTACT_EVT:
-			break;
 		case NODE_SYNCH_END_EVT:
-		case NODE_SYNCH_BEGIN_EVT:
 		case NODE_FAILOVER_BEGIN_EVT:
+			break;
+		case NODE_SYNCH_BEGIN_EVT:
 		case NODE_FAILOVER_END_EVT:
 		default:
 			goto illegal_evt;
@@ -804,61 +852,36 @@ bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr)
 	return true;
 }
 
-static void node_established_contact(struct tipc_node *n_ptr)
-{
-	tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT);
-	n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
-	n_ptr->bclink.oos_state = 0;
-	n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
-	tipc_bclink_add_node(n_ptr->net, n_ptr->addr);
-}
-
-static void node_lost_contact(struct tipc_node *n_ptr,
+static void node_lost_contact(struct tipc_node *n,
 			      struct sk_buff_head *inputq)
 {
 	char addr_string[16];
 	struct tipc_sock_conn *conn, *safe;
 	struct tipc_link *l;
-	struct list_head *conns = &n_ptr->conn_sks;
+	struct list_head *conns = &n->conn_sks;
 	struct sk_buff *skb;
-	struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
 	uint i;
 
 	pr_debug("Lost contact with %s\n",
-		 tipc_addr_string_fill(addr_string, n_ptr->addr));
-
-	/* Flush broadcast link info associated with lost node */
-	if (n_ptr->bclink.recv_permitted) {
-		__skb_queue_purge(&n_ptr->bclink.deferdq);
+		 tipc_addr_string_fill(addr_string, n->addr));
 
-		if (n_ptr->bclink.reasm_buf) {
-			kfree_skb(n_ptr->bclink.reasm_buf);
-			n_ptr->bclink.reasm_buf = NULL;
-		}
-
-		tipc_bclink_remove_node(n_ptr->net, n_ptr->addr);
-		tipc_bclink_acknowledge(n_ptr, INVALID_LINK_SEQ);
-
-		n_ptr->bclink.recv_permitted = false;
-	}
+	/* Clean up broadcast state */
+	tipc_bcast_remove_peer(n->net, n->bc_entry.link);
 
 	/* Abort any ongoing link failover */
 	for (i = 0; i < MAX_BEARERS; i++) {
-		l = n_ptr->links[i].link;
+		l = n->links[i].link;
 		if (l)
 			tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT);
 	}
 
-	/* Prevent re-contact with node until cleanup is done */
-	tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT);
-
 	/* Notify publications from this node */
-	n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
+	n->action_flags |= TIPC_NOTIFY_NODE_DOWN;
 
 	/* Notify sockets connected to node */
 	list_for_each_entry_safe(conn, safe, conns, list) {
 		skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
-				      SHORT_H_SIZE, 0, tn->own_addr,
+				      SHORT_H_SIZE, 0, tipc_own_addr(n->net),
 				      conn->peer_node, conn->port,
 				      conn->peer_port, TIPC_ERR_NO_NODE);
 		if (likely(skb))
@@ -920,18 +943,13 @@ void tipc_node_unlock(struct tipc_node *node)
 	publ_list = &node->publ_list;
 
 	node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
-				TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
-				TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
-				TIPC_BCAST_RESET);
+				TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP);
 
 	spin_unlock_bh(&node->lock);
 
 	if (flags & TIPC_NOTIFY_NODE_DOWN)
 		tipc_publ_notify(net, publ_list, addr);
 
-	if (flags & TIPC_WAKEUP_BCAST_USERS)
-		tipc_bclink_wakeup_users(net);
-
 	if (flags & TIPC_NOTIFY_NODE_UP)
 		tipc_named_node_up(net, addr);
 
@@ -943,11 +961,6 @@ void tipc_node_unlock(struct tipc_node *node)
 		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
 				      link_id, addr);
 
-	if (flags & TIPC_BCAST_MSG_EVT)
-		tipc_bclink_input(net);
-
-	if (flags & TIPC_BCAST_RESET)
-		tipc_node_reset_links(node);
 }
 
 /* Caller should hold node lock for the passed node */
@@ -1063,6 +1076,67 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
 }
 
 /**
+ * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
+ * @net: the applicable net namespace
+ * @skb: TIPC packet
+ * @bearer_id: id of bearer message arrived on
+ *
+ * Invoked with no locks held.
+ */
+static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id)
+{
+	int rc;
+	struct sk_buff_head xmitq;
+	struct tipc_bclink_entry *be;
+	struct tipc_link_entry *le;
+	struct tipc_msg *hdr = buf_msg(skb);
+	int usr = msg_user(hdr);
+	u32 dnode = msg_destnode(hdr);
+	struct tipc_node *n;
+
+	__skb_queue_head_init(&xmitq);
+
+	/* If NACK for other node, let rcv link for that node peek into it */
+	if ((usr == BCAST_PROTOCOL) && (dnode != tipc_own_addr(net)))
+		n = tipc_node_find(net, dnode);
+	else
+		n = tipc_node_find(net, msg_prevnode(hdr));
+	if (!n) {
+		kfree_skb(skb);
+		return;
+	}
+	be = &n->bc_entry;
+	le = &n->links[bearer_id];
+
+	rc = tipc_bcast_rcv(net, be->link, skb);
+
+	/* Broadcast link reset may happen at reassembly failure */
+	if (rc & TIPC_LINK_DOWN_EVT)
+		tipc_node_reset_links(n);
+
+	/* Broadcast ACKs are sent on a unicast link */
+	if (rc & TIPC_LINK_SND_BC_ACK) {
+		tipc_node_lock(n);
+		tipc_link_build_ack_msg(le->link, &xmitq);
+		tipc_node_unlock(n);
+	}
+
+	if (!skb_queue_empty(&xmitq))
+		tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+
+	/* Deliver. 'arrvq' is under inputq2's lock protection */
+	if (!skb_queue_empty(&be->inputq1)) {
+		spin_lock_bh(&be->inputq2.lock);
+		spin_lock_bh(&be->inputq1.lock);
+		skb_queue_splice_tail_init(&be->inputq1, &be->arrvq);
+		spin_unlock_bh(&be->inputq1.lock);
+		spin_unlock_bh(&be->inputq2.lock);
+		tipc_sk_mcast_rcv(net, &be->arrvq, &be->inputq2);
+	}
+	tipc_node_put(n);
+}
+
+/**
  * tipc_node_check_state - check and if necessary update node state
  * @skb: TIPC packet
  * @bearer_id: identity of bearer delivering the packet
@@ -1204,6 +1278,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 	int usr = msg_user(hdr);
 	int bearer_id = b->identity;
 	struct tipc_link_entry *le;
+	u16 bc_ack = msg_bcast_ack(hdr);
 	int rc = 0;
 
 	__skb_queue_head_init(&xmitq);
@@ -1212,13 +1287,12 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 	if (unlikely(!tipc_msg_validate(skb)))
 		goto discard;
 
-	/* Handle arrival of a non-unicast link packet */
+	/* Handle arrival of discovery or broadcast packet */
 	if (unlikely(msg_non_seq(hdr))) {
-		if (usr ==  LINK_CONFIG)
-			tipc_disc_rcv(net, skb, b);
+		if (unlikely(usr == LINK_CONFIG))
+			return tipc_disc_rcv(net, skb, b);
 		else
-			tipc_bclink_rcv(net, skb);
-		return;
+			return tipc_node_bc_rcv(net, skb, bearer_id);
 	}
 
 	/* Locate neighboring node that sent packet */
@@ -1227,19 +1301,18 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 		goto discard;
 	le = &n->links[bearer_id];
 
+	/* Ensure broadcast reception is in synch with peer's send state */
+	if (unlikely(usr == LINK_PROTOCOL))
+		tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr);
+	else if (unlikely(n->bc_entry.link->acked != bc_ack))
+		tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
+
 	tipc_node_lock(n);
 
 	/* Is reception permitted at the moment ? */
 	if (!tipc_node_filter_pkt(n, hdr))
 		goto unlock;
 
-	if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
-		tipc_bclink_sync_state(n, hdr);
-
-	/* Release acked broadcast packets */
-	if (unlikely(n->bclink.acked != msg_bcast_ack(hdr)))
-		tipc_bclink_acknowledge(n, msg_bcast_ack(hdr));
-
 	/* Check and if necessary update node state */
 	if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) {
 		rc = tipc_link_rcv(le->link, skb, &xmitq);
@@ -1254,8 +1327,8 @@ unlock:
 	if (unlikely(rc & TIPC_LINK_DOWN_EVT))
 		tipc_node_link_down(n, bearer_id, false);
 
-	if (unlikely(!skb_queue_empty(&n->bclink.namedq)))
-		tipc_named_rcv(net, &n->bclink.namedq);
+	if (unlikely(!skb_queue_empty(&n->bc_entry.namedq)))
+		tipc_named_rcv(net, &n->bc_entry.namedq);
 
 	if (!skb_queue_empty(&le->inputq))
 		tipc_sk_rcv(net, &le->inputq);
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 344b3e7594fd..6734562d3c6e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -55,36 +55,18 @@
 enum {
 	TIPC_NOTIFY_NODE_DOWN		= (1 << 3),
 	TIPC_NOTIFY_NODE_UP		= (1 << 4),
-	TIPC_WAKEUP_BCAST_USERS		= (1 << 5),
 	TIPC_NOTIFY_LINK_UP		= (1 << 6),
-	TIPC_NOTIFY_LINK_DOWN		= (1 << 7),
-	TIPC_BCAST_MSG_EVT		= (1 << 9),
-	TIPC_BCAST_RESET		= (1 << 10)
+	TIPC_NOTIFY_LINK_DOWN		= (1 << 7)
 };
 
-/**
- * struct tipc_node_bclink - TIPC node bclink structure
- * @acked: sequence # of last outbound b'cast message acknowledged by node
- * @last_in: sequence # of last in-sequence b'cast message received from node
- * @last_sent: sequence # of last b'cast message sent by node
- * @oos_state: state tracker for handling OOS b'cast messages
- * @deferred_queue: deferred queue saved OOS b'cast message received from node
- * @reasm_buf: broadcast reassembly queue head from node
- * @inputq_map: bitmap indicating which inqueues should be kicked
- * @recv_permitted: true if node is allowed to receive b'cast messages
+/* Optional capabilities supported by this code version
  */
-struct tipc_node_bclink {
-	u32 acked;
-	u32 last_in;
-	u32 last_sent;
-	u32 oos_state;
-	u32 deferred_size;
-	struct sk_buff_head deferdq;
-	struct sk_buff *reasm_buf;
-	struct sk_buff_head namedq;
-	bool recv_permitted;
+enum {
+	TIPC_BCAST_SYNCH = (1 << 1)
 };
 
+#define TIPC_NODE_CAPABILITIES TIPC_BCAST_SYNCH
+
 struct tipc_link_entry {
 	struct tipc_link *link;
 	u32 mtu;
@@ -92,6 +74,14 @@ struct tipc_link_entry {
 	struct tipc_media_addr maddr;
 };
 
+struct tipc_bclink_entry {
+	struct tipc_link *link;
+	struct sk_buff_head inputq1;
+	struct sk_buff_head arrvq;
+	struct sk_buff_head inputq2;
+	struct sk_buff_head namedq;
+};
+
 /**
  * struct tipc_node - TIPC node structure
  * @addr: network address of node
@@ -104,7 +94,6 @@ struct tipc_link_entry {
  * @active_links: bearer ids of active links, used as index into links[] array
  * @links: array containing references to all links to node
  * @action_flags: bit mask of different types of node actions
- * @bclink: broadcast-related info
  * @state: connectivity state vs peer node
  * @sync_point: sequence number where synch/failover is finished
  * @list: links to adjacent nodes in sorted list of cluster's nodes
@@ -124,8 +113,8 @@ struct tipc_node {
 	struct hlist_node hash;
 	int active_links[2];
 	struct tipc_link_entry links[MAX_BEARERS];
+	struct tipc_bclink_entry bc_entry;
 	int action_flags;
-	struct tipc_node_bclink bclink;
 	struct list_head list;
 	int state;
 	u16 sync_point;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1060d52ff23e..552dbaba9cf3 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -689,13 +689,13 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
 	msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
 
 new_mtu:
-	mtu = tipc_bclink_get_mtu();
+	mtu = tipc_bcast_get_mtu(net);
 	rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
 	if (unlikely(rc < 0))
 		return rc;
 
 	do {
-		rc = tipc_bclink_xmit(net, pktchain);
+		rc = tipc_bcast_xmit(net, pktchain);
 		if (likely(!rc))
 			return dsz;
 
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 6e648d90297a..ad2719ad4c1b 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -48,6 +48,7 @@
 #include <linux/tipc_netlink.h>
 #include "core.h"
 #include "bearer.h"
+#include "msg.h"
 
 /* IANA assigned UDP port */
 #define UDP_PORT_DEFAULT	6118
@@ -155,14 +156,12 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 	struct udp_bearer *ub;
 	struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
 	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
-	struct sk_buff *clone;
 	struct rtable *rt;
 
 	if (skb_headroom(skb) < UDP_MIN_HEADROOM)
 		pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
 
-	clone = skb_clone(skb, GFP_ATOMIC);
-	skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
+	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
 	ub = rcu_dereference_rtnl(b->media_ptr);
 	if (!ub) {
 		err = -ENODEV;
@@ -172,7 +171,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 		struct flowi4 fl = {
 			.daddr = dst->ipv4.s_addr,
 			.saddr = src->ipv4.s_addr,
-			.flowi4_mark = clone->mark,
+			.flowi4_mark = skb->mark,
 			.flowi4_proto = IPPROTO_UDP
 		};
 		rt = ip_route_output_key(net, &fl);
@@ -181,7 +180,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 			goto tx_error;
 		}
 		ttl = ip4_dst_hoplimit(&rt->dst);
-		err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone,
+		err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb,
 					  src->ipv4.s_addr,
 					  dst->ipv4.s_addr, 0, ttl, 0,
 					  src->udp_port, dst->udp_port,
@@ -204,7 +203,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 		if (err)
 			goto tx_error;
 		ttl = ip6_dst_hoplimit(ndst);
-		err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone,
+		err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
 					   ndst->dev, &src->ipv6,
 					   &dst->ipv6, 0, ttl, src->udp_port,
 					   dst->udp_port, false);
@@ -213,7 +212,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 	return err;
 
 tx_error:
-	kfree_skb(clone);
+	kfree_skb(skb);
 	return err;
 }
 
@@ -222,6 +221,10 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_bearer *ub;
 	struct tipc_bearer *b;
+	int usr = msg_user(buf_msg(skb));
+
+	if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR))
+		skb_linearize(skb);
 
 	ub = rcu_dereference_sk_user_data(sk);
 	if (!ub) {
@@ -430,7 +433,6 @@ static void tipc_udp_disable(struct tipc_bearer *b)
 	}
 	if (ub->ubsock)
 		sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
-	RCU_INIT_POINTER(b->media_ptr, NULL);
 	RCU_INIT_POINTER(ub->bearer, NULL);
 
 	/* sock_release need to be done outside of rtnl lock */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 94f658235fb4..aaa0b58d6aba 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,9 +326,10 @@ found:
 	return s;
 }
 
-static inline int unix_writable(struct sock *sk)
+static int unix_writable(const struct sock *sk)
 {
-	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
+	return sk->sk_state != TCP_LISTEN &&
+	       (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 }
 
 static void unix_write_space(struct sock *sk)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 00e8a349aabc..7fd1220fbfa0 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,19 +36,20 @@
  * not support simultaneous connects (two "client" sockets connecting).
  *
  * - "Server" sockets are referred to as listener sockets throughout this
- * implementation because they are in the SS_LISTEN state.  When a connection
- * request is received (the second kind of socket mentioned above), we create a
- * new socket and refer to it as a pending socket.  These pending sockets are
- * placed on the pending connection list of the listener socket.  When future
- * packets are received for the address the listener socket is bound to, we
- * check if the source of the packet is from one that has an existing pending
- * connection.  If it does, we process the packet for the pending socket.  When
- * that socket reaches the connected state, it is removed from the listener
- * socket's pending list and enqueued in the listener socket's accept queue.
- * Callers of accept(2) will accept connected sockets from the listener socket's
- * accept queue.  If the socket cannot be accepted for some reason then it is
- * marked rejected.  Once the connection is accepted, it is owned by the user
- * process and the responsibility for cleanup falls with that user process.
+ * implementation because they are in the VSOCK_SS_LISTEN state.  When a
+ * connection request is received (the second kind of socket mentioned above),
+ * we create a new socket and refer to it as a pending socket.  These pending
+ * sockets are placed on the pending connection list of the listener socket.
+ * When future packets are received for the address the listener socket is
+ * bound to, we check if the source of the packet is from one that has an
+ * existing pending connection.  If it does, we process the packet for the
+ * pending socket.  When that socket reaches the connected state, it is removed
+ * from the listener socket's pending list and enqueued in the listener
+ * socket's accept queue.  Callers of accept(2) will accept connected sockets
+ * from the listener socket's accept queue.  If the socket cannot be accepted
+ * for some reason then it is marked rejected.  Once the connection is
+ * accepted, it is owned by the user process and the responsibility for cleanup
+ * falls with that user process.
  *
  * - It is possible that these pending sockets will never reach the connected
  * state; in fact, we may never receive another packet after the connection
@@ -114,8 +115,6 @@ static struct proto vsock_proto = {
  */
 #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
 
-#define SS_LISTEN 255
-
 static const struct vsock_transport *transport;
 static DEFINE_MUTEX(vsock_register_mutex);
 
@@ -887,7 +886,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
 		/* Listening sockets that have connections in their accept
 		 * queue can be read.
 		 */
-		if (sk->sk_state == SS_LISTEN
+		if (sk->sk_state == VSOCK_SS_LISTEN
 		    && !vsock_is_accept_queue_empty(sk))
 			mask |= POLLIN | POLLRDNORM;
 
@@ -1144,7 +1143,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
 		err = -EALREADY;
 		break;
 	default:
-		if ((sk->sk_state == SS_LISTEN) ||
+		if ((sk->sk_state == VSOCK_SS_LISTEN) ||
 		    vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
 			err = -EINVAL;
 			goto out;
@@ -1256,7 +1255,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags)
 		goto out;
 	}
 
-	if (listener->sk_state != SS_LISTEN) {
+	if (listener->sk_state != VSOCK_SS_LISTEN) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -1348,7 +1347,7 @@ static int vsock_listen(struct socket *sock, int backlog)
 	}
 
 	sk->sk_max_ack_backlog = backlog;
-	sk->sk_state = SS_LISTEN;
+	sk->sk_state = VSOCK_SS_LISTEN;
 
 	err = 0;
 
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 7555cad83a75..0a369bb440e7 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -92,8 +92,6 @@ static int PROTOCOL_OVERRIDE = -1;
  */
 #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
 
-#define SS_LISTEN 255
-
 /* Helper function to convert from a VMCI error code to a VSock error code. */
 
 static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -893,7 +891,7 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
 	vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
 
 	switch (sk->sk_state) {
-	case SS_LISTEN:
+	case VSOCK_SS_LISTEN:
 		vmci_transport_recv_listen(sk, pkt);
 		break;
 	case SS_CONNECTING:
@@ -1236,7 +1234,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
 	/* Callers of accept() will be be waiting on the listening socket, not
 	 * the pending socket.
 	 */
-	listener->sk_state_change(listener);
+	listener->sk_data_ready(listener);
 
 	return 0;
 
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 4f5543dd2524..da72ed32f143 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -174,6 +174,16 @@ config CFG80211_INTERNAL_REGDB
 
 	  Most distributions have a CRDA package.  So if unsure, say N.
 
+config CFG80211_CRDA_SUPPORT
+	bool "support CRDA" if CFG80211_INTERNAL_REGDB
+	default y
+	depends on CFG80211
+	help
+	  You should enable this option unless you know for sure you have no
+	  need for it, for example when using internal regdb (above.)
+
+	  If unsure, say Y.
+
 config CFG80211_WEXT
 	bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT
 	depends on CFG80211
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3893409dee95..b0915515640e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -419,6 +419,7 @@ use_default_name:
 	device_initialize(&rdev->wiphy.dev);
 	rdev->wiphy.dev.class = &ieee80211_class;
 	rdev->wiphy.dev.platform_data = rdev;
+	device_enable_async_suspend(&rdev->wiphy.dev);
 
 	INIT_LIST_HEAD(&rdev->destroy_list);
 	spin_lock_init(&rdev->destroy_list_lock);
@@ -460,6 +461,9 @@ use_default_name:
 
 	rdev->wiphy.max_num_csa_counters = 1;
 
+	rdev->wiphy.max_sched_scan_plans = 1;
+	rdev->wiphy.max_sched_scan_plan_interval = U32_MAX;
+
 	return &rdev->wiphy;
 }
 EXPORT_SYMBOL(wiphy_new_nm);
@@ -635,7 +639,7 @@ int wiphy_register(struct wiphy *wiphy)
 		if (WARN_ON(!sband->n_channels))
 			return -EINVAL;
 		/*
-		 * on 60gHz band, there are no legacy rates, so
+		 * on 60GHz band, there are no legacy rates, so
 		 * n_bitrates is 0
 		 */
 		if (WARN_ON(band != IEEE80211_BAND_60GHZ &&
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b9d5bc8c148d..a618b4b86fa4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -137,6 +137,7 @@ struct cfg80211_internal_bss {
 	struct list_head list;
 	struct list_head hidden_list;
 	struct rb_node rbn;
+	u64 ts_boottime;
 	unsigned long ts;
 	unsigned long refcount;
 	atomic_t hold;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5d8748b4c8a2..c71e274c810a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3,6 +3,7 @@
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2015	Intel Deutschland GmbH
  */
 
 #include <linux/if.h>
@@ -478,6 +479,12 @@ nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
 	[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
 };
 
+static const struct nla_policy
+nl80211_plan_policy[NL80211_SCHED_SCAN_PLAN_MAX + 1] = {
+	[NL80211_SCHED_SCAN_PLAN_INTERVAL] = { .type = NLA_U32 },
+	[NL80211_SCHED_SCAN_PLAN_ITERATIONS] = { .type = NLA_U32 },
+};
+
 static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 				     struct netlink_callback *cb,
 				     struct cfg80211_registered_device **rdev,
@@ -1303,7 +1310,13 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 		    nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
 				rdev->wiphy.max_sched_scan_ie_len) ||
 		    nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
-			       rdev->wiphy.max_match_sets))
+			       rdev->wiphy.max_match_sets) ||
+		    nla_put_u32(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS,
+				rdev->wiphy.max_sched_scan_plans) ||
+		    nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL,
+				rdev->wiphy.max_sched_scan_plan_interval) ||
+		    nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS,
+				rdev->wiphy.max_sched_scan_plan_iterations))
 			goto nla_put_failure;
 
 		if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
@@ -2403,6 +2416,16 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 		}
 	}
 
+	if (rdev->ops->get_tx_power) {
+		int dbm, ret;
+
+		ret = rdev_get_tx_power(rdev, wdev, &dbm);
+		if (ret == 0 &&
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
+				DBM_TO_MBM(dbm)))
+			goto nla_put_failure;
+	}
+
 	if (wdev->ssid_len) {
 		if (nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
 			goto nla_put_failure;
@@ -3409,12 +3432,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 					   wdev->iftype))
 		return -EINVAL;
 
-	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
-		params.acl = parse_acl_data(&rdev->wiphy, info);
-		if (IS_ERR(params.acl))
-			return PTR_ERR(params.acl);
-	}
-
 	if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
 		params.smps_mode =
 			nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
@@ -3438,6 +3455,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 		params.smps_mode = NL80211_SMPS_OFF;
 	}
 
+	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
+		params.acl = parse_acl_data(&rdev->wiphy, info);
+		if (IS_ERR(params.acl))
+			return PTR_ERR(params.acl);
+	}
+
 	wdev_lock(wdev);
 	err = rdev_start_ap(rdev, dev, &params);
 	if (!err) {
@@ -3945,10 +3968,13 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 				  struct station_parameters *params,
 				  enum cfg80211_station_type statype)
 {
-	if (params->listen_interval != -1)
+	if (params->listen_interval != -1 &&
+	    statype != CFG80211_STA_AP_CLIENT_UNASSOC)
 		return -EINVAL;
+
 	if (params->aid &&
-	    !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
+	    !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
+	    statype != CFG80211_STA_AP_CLIENT_UNASSOC)
 		return -EINVAL;
 
 	/* When you run into this, adjust the code below for the new flag */
@@ -3998,7 +4024,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 		params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
 	}
 
-	if (statype != CFG80211_STA_TDLS_PEER_SETUP) {
+	if (statype != CFG80211_STA_TDLS_PEER_SETUP &&
+	    statype != CFG80211_STA_AP_CLIENT_UNASSOC) {
 		/* reject other things that can't change */
 		if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD)
 			return -EINVAL;
@@ -4010,7 +4037,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 			return -EINVAL;
 	}
 
-	if (statype != CFG80211_STA_AP_CLIENT) {
+	if (statype != CFG80211_STA_AP_CLIENT &&
+	    statype != CFG80211_STA_AP_CLIENT_UNASSOC) {
 		if (params->vlan)
 			return -EINVAL;
 	}
@@ -4022,6 +4050,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 			return -EOPNOTSUPP;
 		break;
 	case CFG80211_STA_AP_CLIENT:
+	case CFG80211_STA_AP_CLIENT_UNASSOC:
 		/* accept only the listed bits */
 		if (params->sta_flags_mask &
 				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
@@ -4219,13 +4248,22 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 
 	memset(&params, 0, sizeof(params));
 
-	params.listen_interval = -1;
-
 	if (!rdev->ops->change_station)
 		return -EOPNOTSUPP;
 
-	if (info->attrs[NL80211_ATTR_STA_AID])
-		return -EINVAL;
+	/*
+	 * AID and listen_interval properties can be set only for unassociated
+	 * station. Include these parameters here and will check them in
+	 * cfg80211_check_station_change().
+	 */
+	if (info->attrs[NL80211_ATTR_PEER_AID])
+		params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]);
+
+	if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
+		params.listen_interval =
+		     nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]);
+	else
+		params.listen_interval = -1;
 
 	if (!info->attrs[NL80211_ATTR_MAC])
 		return -EINVAL;
@@ -4252,9 +4290,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]);
 	}
 
-	if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
-		return -EINVAL;
-
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
@@ -4918,56 +4953,6 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
-static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
-	[NL80211_ATTR_REG_RULE_FLAGS]		= { .type = NLA_U32 },
-	[NL80211_ATTR_FREQ_RANGE_START]		= { .type = NLA_U32 },
-	[NL80211_ATTR_FREQ_RANGE_END]		= { .type = NLA_U32 },
-	[NL80211_ATTR_FREQ_RANGE_MAX_BW]	= { .type = NLA_U32 },
-	[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]	= { .type = NLA_U32 },
-	[NL80211_ATTR_POWER_RULE_MAX_EIRP]	= { .type = NLA_U32 },
-	[NL80211_ATTR_DFS_CAC_TIME]		= { .type = NLA_U32 },
-};
-
-static int parse_reg_rule(struct nlattr *tb[],
-	struct ieee80211_reg_rule *reg_rule)
-{
-	struct ieee80211_freq_range *freq_range = &reg_rule->freq_range;
-	struct ieee80211_power_rule *power_rule = &reg_rule->power_rule;
-
-	if (!tb[NL80211_ATTR_REG_RULE_FLAGS])
-		return -EINVAL;
-	if (!tb[NL80211_ATTR_FREQ_RANGE_START])
-		return -EINVAL;
-	if (!tb[NL80211_ATTR_FREQ_RANGE_END])
-		return -EINVAL;
-	if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
-		return -EINVAL;
-	if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP])
-		return -EINVAL;
-
-	reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]);
-
-	freq_range->start_freq_khz =
-		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]);
-	freq_range->end_freq_khz =
-		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]);
-	freq_range->max_bandwidth_khz =
-		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
-
-	power_rule->max_eirp =
-		nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]);
-
-	if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN])
-		power_rule->max_antenna_gain =
-			nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]);
-
-	if (tb[NL80211_ATTR_DFS_CAC_TIME])
-		reg_rule->dfs_cac_ms =
-			nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]);
-
-	return 0;
-}
-
 static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	char *data = NULL;
@@ -5599,6 +5584,57 @@ out_err:
 	return err;
 }
 
+#ifdef CONFIG_CFG80211_CRDA_SUPPORT
+static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
+	[NL80211_ATTR_REG_RULE_FLAGS]		= { .type = NLA_U32 },
+	[NL80211_ATTR_FREQ_RANGE_START]		= { .type = NLA_U32 },
+	[NL80211_ATTR_FREQ_RANGE_END]		= { .type = NLA_U32 },
+	[NL80211_ATTR_FREQ_RANGE_MAX_BW]	= { .type = NLA_U32 },
+	[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]	= { .type = NLA_U32 },
+	[NL80211_ATTR_POWER_RULE_MAX_EIRP]	= { .type = NLA_U32 },
+	[NL80211_ATTR_DFS_CAC_TIME]		= { .type = NLA_U32 },
+};
+
+static int parse_reg_rule(struct nlattr *tb[],
+	struct ieee80211_reg_rule *reg_rule)
+{
+	struct ieee80211_freq_range *freq_range = &reg_rule->freq_range;
+	struct ieee80211_power_rule *power_rule = &reg_rule->power_rule;
+
+	if (!tb[NL80211_ATTR_REG_RULE_FLAGS])
+		return -EINVAL;
+	if (!tb[NL80211_ATTR_FREQ_RANGE_START])
+		return -EINVAL;
+	if (!tb[NL80211_ATTR_FREQ_RANGE_END])
+		return -EINVAL;
+	if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW])
+		return -EINVAL;
+	if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP])
+		return -EINVAL;
+
+	reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]);
+
+	freq_range->start_freq_khz =
+		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]);
+	freq_range->end_freq_khz =
+		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]);
+	freq_range->max_bandwidth_khz =
+		nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]);
+
+	power_rule->max_eirp =
+		nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]);
+
+	if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN])
+		power_rule->max_antenna_gain =
+			nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]);
+
+	if (tb[NL80211_ATTR_DFS_CAC_TIME])
+		reg_rule->dfs_cac_ms =
+			nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]);
+
+	return 0;
+}
+
 static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
@@ -5675,6 +5711,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 	kfree(rd);
 	return r;
 }
+#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
 
 static int validate_scan_freqs(struct nlattr *freqs)
 {
@@ -5960,14 +5997,100 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int
+nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans,
+			       struct cfg80211_sched_scan_request *request,
+			       struct nlattr **attrs)
+{
+	int tmp, err, i = 0;
+	struct nlattr *attr;
+
+	if (!attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) {
+		u32 interval;
+
+		/*
+		 * If scan plans are not specified,
+		 * %NL80211_ATTR_SCHED_SCAN_INTERVAL must be specified. In this
+		 * case one scan plan will be set with the specified scan
+		 * interval and infinite number of iterations.
+		 */
+		if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
+			return -EINVAL;
+
+		interval = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]);
+		if (!interval)
+			return -EINVAL;
+
+		request->scan_plans[0].interval =
+			DIV_ROUND_UP(interval, MSEC_PER_SEC);
+		if (!request->scan_plans[0].interval)
+			return -EINVAL;
+
+		if (request->scan_plans[0].interval >
+		    wiphy->max_sched_scan_plan_interval)
+			request->scan_plans[0].interval =
+				wiphy->max_sched_scan_plan_interval;
+
+		return 0;
+	}
+
+	nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp) {
+		struct nlattr *plan[NL80211_SCHED_SCAN_PLAN_MAX + 1];
+
+		if (WARN_ON(i >= n_plans))
+			return -EINVAL;
+
+		err = nla_parse(plan, NL80211_SCHED_SCAN_PLAN_MAX,
+				nla_data(attr), nla_len(attr),
+				nl80211_plan_policy);
+		if (err)
+			return err;
+
+		if (!plan[NL80211_SCHED_SCAN_PLAN_INTERVAL])
+			return -EINVAL;
+
+		request->scan_plans[i].interval =
+			nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_INTERVAL]);
+		if (!request->scan_plans[i].interval ||
+		    request->scan_plans[i].interval >
+		    wiphy->max_sched_scan_plan_interval)
+			return -EINVAL;
+
+		if (plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]) {
+			request->scan_plans[i].iterations =
+				nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]);
+			if (!request->scan_plans[i].iterations ||
+			    (request->scan_plans[i].iterations >
+			     wiphy->max_sched_scan_plan_iterations))
+				return -EINVAL;
+		} else if (i < n_plans - 1) {
+			/*
+			 * All scan plans but the last one must specify
+			 * a finite number of iterations
+			 */
+			return -EINVAL;
+		}
+
+		i++;
+	}
+
+	/*
+	 * The last scan plan must not specify the number of
+	 * iterations, it is supposed to run infinitely
+	 */
+	if (request->scan_plans[n_plans - 1].iterations)
+		return  -EINVAL;
+
+	return 0;
+}
+
 static struct cfg80211_sched_scan_request *
 nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 			 struct nlattr **attrs)
 {
 	struct cfg80211_sched_scan_request *request;
 	struct nlattr *attr;
-	int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i;
-	u32 interval;
+	int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0;
 	enum ieee80211_band band;
 	size_t ie_len;
 	struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1];
@@ -5976,13 +6099,6 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 	if (!is_valid_ie_attr(attrs[NL80211_ATTR_IE]))
 		return ERR_PTR(-EINVAL);
 
-	if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
-		return ERR_PTR(-EINVAL);
-
-	interval = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]);
-	if (interval == 0)
-		return ERR_PTR(-EINVAL);
-
 	if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
 		n_channels = validate_scan_freqs(
 				attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
@@ -6046,9 +6162,37 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 	if (ie_len > wiphy->max_sched_scan_ie_len)
 		return ERR_PTR(-EINVAL);
 
+	if (attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) {
+		/*
+		 * NL80211_ATTR_SCHED_SCAN_INTERVAL must not be specified since
+		 * each scan plan already specifies its own interval
+		 */
+		if (attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
+			return ERR_PTR(-EINVAL);
+
+		nla_for_each_nested(attr,
+				    attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp)
+			n_plans++;
+	} else {
+		/*
+		 * The scan interval attribute is kept for backward
+		 * compatibility. If no scan plans are specified and sched scan
+		 * interval is specified, one scan plan will be set with this
+		 * scan interval and infinite number of iterations.
+		 */
+		if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL])
+			return ERR_PTR(-EINVAL);
+
+		n_plans = 1;
+	}
+
+	if (!n_plans || n_plans > wiphy->max_sched_scan_plans)
+		return ERR_PTR(-EINVAL);
+
 	request = kzalloc(sizeof(*request)
 			+ sizeof(*request->ssids) * n_ssids
 			+ sizeof(*request->match_sets) * n_match_sets
+			+ sizeof(*request->scan_plans) * n_plans
 			+ sizeof(*request->channels) * n_channels
 			+ ie_len, GFP_KERNEL);
 	if (!request)
@@ -6076,6 +6220,18 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 	}
 	request->n_match_sets = n_match_sets;
 
+	if (n_match_sets)
+		request->scan_plans = (void *)(request->match_sets +
+					       n_match_sets);
+	else if (request->ie)
+		request->scan_plans = (void *)(request->ie + ie_len);
+	else if (n_ssids)
+		request->scan_plans = (void *)(request->ssids + n_ssids);
+	else
+		request->scan_plans = (void *)(request->channels + n_channels);
+
+	request->n_scan_plans = n_plans;
+
 	i = 0;
 	if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
 		/* user specified, bail out if channel not found */
@@ -6238,7 +6394,10 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 		request->delay =
 			nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]);
 
-	request->interval = interval;
+	err = nl80211_parse_sched_scan_plans(wiphy, n_plans, request, attrs);
+	if (err)
+		goto out_free;
+
 	request->scan_start = jiffies;
 
 	return request;
@@ -6591,6 +6750,11 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 			jiffies_to_msecs(jiffies - intbss->ts)))
 		goto nla_put_failure;
 
+	if (intbss->ts_boottime &&
+	    nla_put_u64(msg, NL80211_BSS_LAST_SEEN_BOOTTIME,
+			intbss->ts_boottime))
+		goto nla_put_failure;
+
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
 		if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal))
@@ -8831,7 +8995,7 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg,
 static int nl80211_send_wowlan_nd(struct sk_buff *msg,
 				  struct cfg80211_sched_scan_request *req)
 {
-	struct nlattr *nd, *freqs, *matches, *match;
+	struct nlattr *nd, *freqs, *matches, *match, *scan_plans, *scan_plan;
 	int i;
 
 	if (!req)
@@ -8841,7 +9005,9 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg,
 	if (!nd)
 		return -ENOBUFS;
 
-	if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval))
+	if (req->n_scan_plans == 1 &&
+	    nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL,
+			req->scan_plans[0].interval * 1000))
 		return -ENOBUFS;
 
 	if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_DELAY, req->delay))
@@ -8868,6 +9034,23 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg,
 		nla_nest_end(msg, matches);
 	}
 
+	scan_plans = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_PLANS);
+	if (!scan_plans)
+		return -ENOBUFS;
+
+	for (i = 0; i < req->n_scan_plans; i++) {
+		scan_plan = nla_nest_start(msg, i + 1);
+		if (!scan_plan ||
+		    nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
+				req->scan_plans[i].interval) ||
+		    (req->scan_plans[i].iterations &&
+		     nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_ITERATIONS,
+				 req->scan_plans[i].iterations)))
+			return -ENOBUFS;
+		nla_nest_end(msg, scan_plan);
+	}
+	nla_nest_end(msg, scan_plans);
+
 	nla_nest_end(msg, nd);
 
 	return 0;
@@ -9938,6 +10121,9 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
 				if (!wdev->netdev && !wdev->p2p_started)
 					return -ENETDOWN;
 			}
+
+			if (!vcmd->doit)
+				return -EOPNOTSUPP;
 		} else {
 			wdev = NULL;
 		}
@@ -9957,6 +10143,193 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
 	return -EOPNOTSUPP;
 }
 
+static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
+				       struct netlink_callback *cb,
+				       struct cfg80211_registered_device **rdev,
+				       struct wireless_dev **wdev)
+{
+	u32 vid, subcmd;
+	unsigned int i;
+	int vcmd_idx = -1;
+	int err;
+	void *data = NULL;
+	unsigned int data_len = 0;
+
+	rtnl_lock();
+
+	if (cb->args[0]) {
+		/* subtract the 1 again here */
+		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
+		struct wireless_dev *tmp;
+
+		if (!wiphy) {
+			err = -ENODEV;
+			goto out_unlock;
+		}
+		*rdev = wiphy_to_rdev(wiphy);
+		*wdev = NULL;
+
+		if (cb->args[1]) {
+			list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
+				if (tmp->identifier == cb->args[1] - 1) {
+					*wdev = tmp;
+					break;
+				}
+			}
+		}
+
+		/* keep rtnl locked in successful case */
+		return 0;
+	}
+
+	err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+			  nl80211_fam.attrbuf, nl80211_fam.maxattr,
+			  nl80211_policy);
+	if (err)
+		goto out_unlock;
+
+	if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] ||
+	    !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
+					   nl80211_fam.attrbuf);
+	if (IS_ERR(*wdev))
+		*wdev = NULL;
+
+	*rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
+					   nl80211_fam.attrbuf);
+	if (IS_ERR(*rdev)) {
+		err = PTR_ERR(*rdev);
+		goto out_unlock;
+	}
+
+	vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]);
+	subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
+
+	for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) {
+		const struct wiphy_vendor_command *vcmd;
+
+		vcmd = &(*rdev)->wiphy.vendor_commands[i];
+
+		if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
+			continue;
+
+		if (!vcmd->dumpit) {
+			err = -EOPNOTSUPP;
+			goto out_unlock;
+		}
+
+		vcmd_idx = i;
+		break;
+	}
+
+	if (vcmd_idx < 0) {
+		err = -EOPNOTSUPP;
+		goto out_unlock;
+	}
+
+	if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) {
+		data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
+		data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
+	}
+
+	/* 0 is the first index - add 1 to parse only once */
+	cb->args[0] = (*rdev)->wiphy_idx + 1;
+	/* add 1 to know if it was NULL */
+	cb->args[1] = *wdev ? (*wdev)->identifier + 1 : 0;
+	cb->args[2] = vcmd_idx;
+	cb->args[3] = (unsigned long)data;
+	cb->args[4] = data_len;
+
+	/* keep rtnl locked in successful case */
+	return 0;
+ out_unlock:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
+				   struct netlink_callback *cb)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	unsigned int vcmd_idx;
+	const struct wiphy_vendor_command *vcmd;
+	void *data;
+	int data_len;
+	int err;
+	struct nlattr *vendor_data;
+
+	err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev);
+	if (err)
+		return err;
+
+	vcmd_idx = cb->args[2];
+	data = (void *)cb->args[3];
+	data_len = cb->args[4];
+	vcmd = &rdev->wiphy.vendor_commands[vcmd_idx];
+
+	if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
+			   WIPHY_VENDOR_CMD_NEED_NETDEV)) {
+		if (!wdev)
+			return -EINVAL;
+		if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
+		    !wdev->netdev)
+			return -EINVAL;
+
+		if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
+			if (wdev->netdev &&
+			    !netif_running(wdev->netdev))
+				return -ENETDOWN;
+			if (!wdev->netdev && !wdev->p2p_started)
+				return -ENETDOWN;
+		}
+	}
+
+	while (1) {
+		void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					   NL80211_CMD_VENDOR);
+		if (!hdr)
+			break;
+
+		if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+		    (wdev && nla_put_u64(skb, NL80211_ATTR_WDEV,
+					 wdev_id(wdev)))) {
+			genlmsg_cancel(skb, hdr);
+			break;
+		}
+
+		vendor_data = nla_nest_start(skb, NL80211_ATTR_VENDOR_DATA);
+		if (!vendor_data) {
+			genlmsg_cancel(skb, hdr);
+			break;
+		}
+
+		err = vcmd->dumpit(&rdev->wiphy, wdev, skb, data, data_len,
+				   (unsigned long *)&cb->args[5]);
+		nla_nest_end(skb, vendor_data);
+
+		if (err == -ENOBUFS || err == -ENOENT) {
+			genlmsg_cancel(skb, hdr);
+			break;
+		} else if (err) {
+			genlmsg_cancel(skb, hdr);
+			goto out;
+		}
+
+		genlmsg_end(skb, hdr);
+	}
+
+	err = skb->len;
+ out:
+	rtnl_unlock();
+	return err;
+}
+
 struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
 					   enum nl80211_commands cmd,
 					   enum nl80211_attrs attr,
@@ -10533,6 +10906,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_RTNL,
 		/* can be retrieved by unprivileged users */
 	},
+#ifdef CONFIG_CFG80211_CRDA_SUPPORT
 	{
 		.cmd = NL80211_CMD_SET_REG,
 		.doit = nl80211_set_reg,
@@ -10540,6 +10914,7 @@ static const struct genl_ops nl80211_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_RTNL,
 	},
+#endif
 	{
 		.cmd = NL80211_CMD_REQ_SET_REG,
 		.doit = nl80211_req_set_reg,
@@ -10994,6 +11369,7 @@ static const struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_VENDOR,
 		.doit = nl80211_vendor_cmd,
+		.dumpit = nl80211_vendor_cmd_dump,
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2510b231451e..2e8d6f39ed56 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -135,10 +135,7 @@ static spinlock_t reg_indoor_lock;
 /* Used to track the userspace process controlling the indoor setting */
 static u32 reg_is_indoor_portid;
 
-/* Max number of consecutive attempts to communicate with CRDA  */
-#define REG_MAX_CRDA_TIMEOUTS 10
-
-static u32 reg_crda_timeouts;
+static void restore_regulatory_settings(bool reset_user);
 
 static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 {
@@ -226,9 +223,6 @@ static DECLARE_DELAYED_WORK(reg_check_chans, reg_check_chans_work);
 static void reg_todo(struct work_struct *work);
 static DECLARE_WORK(reg_work, reg_todo);
 
-static void reg_timeout_work(struct work_struct *work);
-static DECLARE_DELAYED_WORK(reg_timeout, reg_timeout_work);
-
 /* We keep a static world regulatory domain in case of the absence of CRDA */
 static const struct ieee80211_regdomain world_regdom = {
 	.n_reg_rules = 8,
@@ -262,7 +256,7 @@ static const struct ieee80211_regdomain world_regdom = {
 		REG_RULE(5745-10, 5825+10, 80, 6, 20,
 			NL80211_RRF_NO_IR),
 
-		/* IEEE 802.11ad (60gHz), channels 1..3 */
+		/* IEEE 802.11ad (60GHz), channels 1..3 */
 		REG_RULE(56160+2160*1-1080, 56160+2160*3+1080, 2160, 0, 0, 0),
 	}
 };
@@ -279,6 +273,9 @@ MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
 
 static void reg_free_request(struct regulatory_request *request)
 {
+	if (request == &core_request_world)
+		return;
+
 	if (request != get_last_request())
 		kfree(request);
 }
@@ -453,68 +450,70 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 }
 
 #ifdef CONFIG_CFG80211_INTERNAL_REGDB
-struct reg_regdb_search_request {
-	char alpha2[2];
+struct reg_regdb_apply_request {
 	struct list_head list;
+	const struct ieee80211_regdomain *regdom;
 };
 
-static LIST_HEAD(reg_regdb_search_list);
-static DEFINE_MUTEX(reg_regdb_search_mutex);
+static LIST_HEAD(reg_regdb_apply_list);
+static DEFINE_MUTEX(reg_regdb_apply_mutex);
 
-static void reg_regdb_search(struct work_struct *work)
+static void reg_regdb_apply(struct work_struct *work)
 {
-	struct reg_regdb_search_request *request;
-	const struct ieee80211_regdomain *curdom, *regdom = NULL;
-	int i;
+	struct reg_regdb_apply_request *request;
 
 	rtnl_lock();
 
-	mutex_lock(&reg_regdb_search_mutex);
-	while (!list_empty(&reg_regdb_search_list)) {
-		request = list_first_entry(&reg_regdb_search_list,
-					   struct reg_regdb_search_request,
+	mutex_lock(&reg_regdb_apply_mutex);
+	while (!list_empty(&reg_regdb_apply_list)) {
+		request = list_first_entry(&reg_regdb_apply_list,
+					   struct reg_regdb_apply_request,
 					   list);
 		list_del(&request->list);
 
-		for (i = 0; i < reg_regdb_size; i++) {
-			curdom = reg_regdb[i];
-
-			if (alpha2_equal(request->alpha2, curdom->alpha2)) {
-				regdom = reg_copy_regd(curdom);
-				break;
-			}
-		}
-
+		set_regdom(request->regdom, REGD_SOURCE_INTERNAL_DB);
 		kfree(request);
 	}
-	mutex_unlock(&reg_regdb_search_mutex);
-
-	if (!IS_ERR_OR_NULL(regdom))
-		set_regdom(regdom, REGD_SOURCE_INTERNAL_DB);
+	mutex_unlock(&reg_regdb_apply_mutex);
 
 	rtnl_unlock();
 }
 
-static DECLARE_WORK(reg_regdb_work, reg_regdb_search);
+static DECLARE_WORK(reg_regdb_work, reg_regdb_apply);
 
-static void reg_regdb_query(const char *alpha2)
+static int reg_query_builtin(const char *alpha2)
 {
-	struct reg_regdb_search_request *request;
+	const struct ieee80211_regdomain *regdom = NULL;
+	struct reg_regdb_apply_request *request;
+	unsigned int i;
 
-	if (!alpha2)
-		return;
+	for (i = 0; i < reg_regdb_size; i++) {
+		if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) {
+			regdom = reg_regdb[i];
+			break;
+		}
+	}
+
+	if (!regdom)
+		return -ENODATA;
 
-	request = kzalloc(sizeof(struct reg_regdb_search_request), GFP_KERNEL);
+	request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL);
 	if (!request)
-		return;
+		return -ENOMEM;
 
-	memcpy(request->alpha2, alpha2, 2);
+	request->regdom = reg_copy_regd(regdom);
+	if (IS_ERR_OR_NULL(request->regdom)) {
+		kfree(request);
+		return -ENOMEM;
+	}
 
-	mutex_lock(&reg_regdb_search_mutex);
-	list_add_tail(&request->list, &reg_regdb_search_list);
-	mutex_unlock(&reg_regdb_search_mutex);
+	mutex_lock(&reg_regdb_apply_mutex);
+	list_add_tail(&request->list, &reg_regdb_apply_list);
+	mutex_unlock(&reg_regdb_apply_mutex);
 
 	schedule_work(&reg_regdb_work);
+
+	return 0;
 }
 
 /* Feel free to add any other sanity checks here */
@@ -525,9 +524,45 @@ static void reg_regdb_size_check(void)
 }
 #else
 static inline void reg_regdb_size_check(void) {}
-static inline void reg_regdb_query(const char *alpha2) {}
+static inline int reg_query_builtin(const char *alpha2)
+{
+	return -ENODATA;
+}
 #endif /* CONFIG_CFG80211_INTERNAL_REGDB */
 
+#ifdef CONFIG_CFG80211_CRDA_SUPPORT
+/* Max number of consecutive attempts to communicate with CRDA  */
+#define REG_MAX_CRDA_TIMEOUTS 10
+
+static u32 reg_crda_timeouts;
+
+static void crda_timeout_work(struct work_struct *work);
+static DECLARE_DELAYED_WORK(crda_timeout, crda_timeout_work);
+
+static void crda_timeout_work(struct work_struct *work)
+{
+	REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
+	rtnl_lock();
+	reg_crda_timeouts++;
+	restore_regulatory_settings(true);
+	rtnl_unlock();
+}
+
+static void cancel_crda_timeout(void)
+{
+	cancel_delayed_work(&crda_timeout);
+}
+
+static void cancel_crda_timeout_sync(void)
+{
+	cancel_delayed_work_sync(&crda_timeout);
+}
+
+static void reset_crda_timeouts(void)
+{
+	reg_crda_timeouts = 0;
+}
+
 /*
  * This lets us keep regulatory code which is updated on a regulatory
  * basis in userspace.
@@ -536,13 +571,11 @@ static int call_crda(const char *alpha2)
 {
 	char country[12];
 	char *env[] = { country, NULL };
+	int ret;
 
 	snprintf(country, sizeof(country), "COUNTRY=%c%c",
 		 alpha2[0], alpha2[1]);
 
-	/* query internal regulatory database (if it exists) */
-	reg_regdb_query(alpha2);
-
 	if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) {
 		pr_debug("Exceeded CRDA call max attempts. Not calling CRDA\n");
 		return -EINVAL;
@@ -554,18 +587,34 @@ static int call_crda(const char *alpha2)
 	else
 		pr_debug("Calling CRDA to update world regulatory domain\n");
 
-	return kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env);
+	ret = kobject_uevent_env(&reg_pdev->dev.kobj, KOBJ_CHANGE, env);
+	if (ret)
+		return ret;
+
+	queue_delayed_work(system_power_efficient_wq,
+			   &crda_timeout, msecs_to_jiffies(3142));
+	return 0;
+}
+#else
+static inline void cancel_crda_timeout(void) {}
+static inline void cancel_crda_timeout_sync(void) {}
+static inline void reset_crda_timeouts(void) {}
+static inline int call_crda(const char *alpha2)
+{
+	return -ENODATA;
 }
+#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
 
-static enum reg_request_treatment
-reg_call_crda(struct regulatory_request *request)
+static bool reg_query_database(struct regulatory_request *request)
 {
-	if (call_crda(request->alpha2))
-		return REG_REQ_IGNORE;
+	/* query internal regulatory database (if it exists) */
+	if (reg_query_builtin(request->alpha2) == 0)
+		return true;
 
-	queue_delayed_work(system_power_efficient_wq,
-			   &reg_timeout, msecs_to_jiffies(3142));
-	return REG_REQ_OK;
+	if (call_crda(request->alpha2) == 0)
+		return true;
+
+	return false;
 }
 
 bool reg_is_valid_request(const char *alpha2)
@@ -1040,8 +1089,8 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
 	return ERR_PTR(-EINVAL);
 }
 
-const struct ieee80211_reg_rule *__freq_reg_info(struct wiphy *wiphy,
-						 u32 center_freq, u32 min_bw)
+static const struct ieee80211_reg_rule *
+__freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 min_bw)
 {
 	const struct ieee80211_regdomain *regd = reg_get_regdomain(wiphy);
 	const struct ieee80211_reg_rule *reg_rule = NULL;
@@ -1081,11 +1130,11 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator)
 }
 EXPORT_SYMBOL(reg_initiator_name);
 
-#ifdef CONFIG_CFG80211_REG_DEBUG
 static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
 				    struct ieee80211_channel *chan,
 				    const struct ieee80211_reg_rule *reg_rule)
 {
+#ifdef CONFIG_CFG80211_REG_DEBUG
 	const struct ieee80211_power_rule *power_rule;
 	const struct ieee80211_freq_range *freq_range;
 	char max_antenna_gain[32], bw[32];
@@ -1096,7 +1145,7 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
 	if (!power_rule->max_antenna_gain)
 		snprintf(max_antenna_gain, sizeof(max_antenna_gain), "N/A");
 	else
-		snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d",
+		snprintf(max_antenna_gain, sizeof(max_antenna_gain), "%d mBi",
 			 power_rule->max_antenna_gain);
 
 	if (reg_rule->flags & NL80211_RRF_AUTO_BW)
@@ -1110,19 +1159,12 @@ static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
 	REG_DBG_PRINT("Updating information on frequency %d MHz with regulatory rule:\n",
 		      chan->center_freq);
 
-	REG_DBG_PRINT("%d KHz - %d KHz @ %s), (%s mBi, %d mBm)\n",
+	REG_DBG_PRINT("(%d KHz - %d KHz @ %s), (%s, %d mBm)\n",
 		      freq_range->start_freq_khz, freq_range->end_freq_khz,
 		      bw, max_antenna_gain,
 		      power_rule->max_eirp);
-}
-#else
-static void chan_reg_rule_print_dbg(const struct ieee80211_regdomain *regd,
-				    struct ieee80211_channel *chan,
-				    const struct ieee80211_reg_rule *reg_rule)
-{
-	return;
-}
 #endif
+}
 
 /*
  * Note that right now we assume the desired channel bandwidth
@@ -1311,7 +1353,8 @@ static bool reg_dev_ignore_cell_hint(struct wiphy *wiphy)
 	return !(wiphy->features & NL80211_FEATURE_CELL_BASE_REG_HINTS);
 }
 #else
-static int reg_ignore_cell_hint(struct regulatory_request *pending_request)
+static enum reg_request_treatment
+reg_ignore_cell_hint(struct regulatory_request *pending_request)
 {
 	return REG_REQ_IGNORE;
 }
@@ -1846,7 +1889,7 @@ static void reg_set_request_processed(void)
 		need_more_processing = true;
 	spin_unlock(&reg_requests_lock);
 
-	cancel_delayed_work(&reg_timeout);
+	cancel_crda_timeout();
 
 	if (need_more_processing)
 		schedule_work(&reg_work);
@@ -1858,19 +1901,18 @@ static void reg_set_request_processed(void)
  *
  * The wireless subsystem can use this function to process
  * a regulatory request issued by the regulatory core.
- *
- * Returns one of the different reg request treatment values.
  */
 static enum reg_request_treatment
 reg_process_hint_core(struct regulatory_request *core_request)
 {
+	if (reg_query_database(core_request)) {
+		core_request->intersect = false;
+		core_request->processed = false;
+		reg_update_last_request(core_request);
+		return REG_REQ_OK;
+	}
 
-	core_request->intersect = false;
-	core_request->processed = false;
-
-	reg_update_last_request(core_request);
-
-	return reg_call_crda(core_request);
+	return REG_REQ_IGNORE;
 }
 
 static enum reg_request_treatment
@@ -1915,8 +1957,6 @@ __reg_process_hint_user(struct regulatory_request *user_request)
  *
  * The wireless subsystem can use this function to process
  * a regulatory request initiated by userspace.
- *
- * Returns one of the different reg request treatment values.
  */
 static enum reg_request_treatment
 reg_process_hint_user(struct regulatory_request *user_request)
@@ -1925,20 +1965,20 @@ reg_process_hint_user(struct regulatory_request *user_request)
 
 	treatment = __reg_process_hint_user(user_request);
 	if (treatment == REG_REQ_IGNORE ||
-	    treatment == REG_REQ_ALREADY_SET) {
-		reg_free_request(user_request);
-		return treatment;
-	}
+	    treatment == REG_REQ_ALREADY_SET)
+		return REG_REQ_IGNORE;
 
 	user_request->intersect = treatment == REG_REQ_INTERSECT;
 	user_request->processed = false;
 
-	reg_update_last_request(user_request);
-
-	user_alpha2[0] = user_request->alpha2[0];
-	user_alpha2[1] = user_request->alpha2[1];
+	if (reg_query_database(user_request)) {
+		reg_update_last_request(user_request);
+		user_alpha2[0] = user_request->alpha2[0];
+		user_alpha2[1] = user_request->alpha2[1];
+		return REG_REQ_OK;
+	}
 
-	return reg_call_crda(user_request);
+	return REG_REQ_IGNORE;
 }
 
 static enum reg_request_treatment
@@ -1986,16 +2026,12 @@ reg_process_hint_driver(struct wiphy *wiphy,
 	case REG_REQ_OK:
 		break;
 	case REG_REQ_IGNORE:
-		reg_free_request(driver_request);
-		return treatment;
+		return REG_REQ_IGNORE;
 	case REG_REQ_INTERSECT:
-		/* fall through */
 	case REG_REQ_ALREADY_SET:
 		regd = reg_copy_regd(get_cfg80211_regdom());
-		if (IS_ERR(regd)) {
-			reg_free_request(driver_request);
+		if (IS_ERR(regd))
 			return REG_REQ_IGNORE;
-		}
 
 		tmp = get_wiphy_regdom(wiphy);
 		rcu_assign_pointer(wiphy->regd, regd);
@@ -2006,8 +2042,6 @@ reg_process_hint_driver(struct wiphy *wiphy,
 	driver_request->intersect = treatment == REG_REQ_INTERSECT;
 	driver_request->processed = false;
 
-	reg_update_last_request(driver_request);
-
 	/*
 	 * Since CRDA will not be called in this case as we already
 	 * have applied the requested regulatory domain before we just
@@ -2015,11 +2049,17 @@ reg_process_hint_driver(struct wiphy *wiphy,
 	 */
 	if (treatment == REG_REQ_ALREADY_SET) {
 		nl80211_send_reg_change_event(driver_request);
+		reg_update_last_request(driver_request);
 		reg_set_request_processed();
-		return treatment;
+		return REG_REQ_ALREADY_SET;
 	}
 
-	return reg_call_crda(driver_request);
+	if (reg_query_database(driver_request)) {
+		reg_update_last_request(driver_request);
+		return REG_REQ_OK;
+	}
+
+	return REG_REQ_IGNORE;
 }
 
 static enum reg_request_treatment
@@ -2085,12 +2125,11 @@ reg_process_hint_country_ie(struct wiphy *wiphy,
 	case REG_REQ_OK:
 		break;
 	case REG_REQ_IGNORE:
-		/* fall through */
+		return REG_REQ_IGNORE;
 	case REG_REQ_ALREADY_SET:
 		reg_free_request(country_ie_request);
-		return treatment;
+		return REG_REQ_ALREADY_SET;
 	case REG_REQ_INTERSECT:
-		reg_free_request(country_ie_request);
 		/*
 		 * This doesn't happen yet, not sure we
 		 * ever want to support it for this case.
@@ -2102,9 +2141,12 @@ reg_process_hint_country_ie(struct wiphy *wiphy,
 	country_ie_request->intersect = false;
 	country_ie_request->processed = false;
 
-	reg_update_last_request(country_ie_request);
+	if (reg_query_database(country_ie_request)) {
+		reg_update_last_request(country_ie_request);
+		return REG_REQ_OK;
+	}
 
-	return reg_call_crda(country_ie_request);
+	return REG_REQ_IGNORE;
 }
 
 /* This processes *all* regulatory hints */
@@ -2118,11 +2160,11 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 
 	switch (reg_request->initiator) {
 	case NL80211_REGDOM_SET_BY_CORE:
-		reg_process_hint_core(reg_request);
-		return;
+		treatment = reg_process_hint_core(reg_request);
+		break;
 	case NL80211_REGDOM_SET_BY_USER:
-		reg_process_hint_user(reg_request);
-		return;
+		treatment = reg_process_hint_user(reg_request);
+		break;
 	case NL80211_REGDOM_SET_BY_DRIVER:
 		if (!wiphy)
 			goto out_free;
@@ -2138,6 +2180,12 @@ static void reg_process_hint(struct regulatory_request *reg_request)
 		goto out_free;
 	}
 
+	if (treatment == REG_REQ_IGNORE)
+		goto out_free;
+
+	WARN(treatment != REG_REQ_OK && treatment != REG_REQ_ALREADY_SET,
+	     "unexpected treatment value %d\n", treatment);
+
 	/* This is required so that the orig_* parameters are saved.
 	 * NOTE: treatment must be set for any case that reaches here!
 	 */
@@ -2345,7 +2393,7 @@ int regulatory_hint_user(const char *alpha2,
 	request->user_reg_hint_type = user_reg_hint_type;
 
 	/* Allow calling CRDA again */
-	reg_crda_timeouts = 0;
+	reset_crda_timeouts();
 
 	queue_regulatory_request(request);
 
@@ -2417,7 +2465,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 	request->initiator = NL80211_REGDOM_SET_BY_DRIVER;
 
 	/* Allow calling CRDA again */
-	reg_crda_timeouts = 0;
+	reset_crda_timeouts();
 
 	queue_regulatory_request(request);
 
@@ -2473,7 +2521,7 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band,
 	request->country_ie_env = env;
 
 	/* Allow calling CRDA again */
-	reg_crda_timeouts = 0;
+	reset_crda_timeouts();
 
 	queue_regulatory_request(request);
 	request = NULL;
@@ -2874,11 +2922,8 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd,
 	}
 
 	request_wiphy = wiphy_idx_to_wiphy(driver_request->wiphy_idx);
-	if (!request_wiphy) {
-		queue_delayed_work(system_power_efficient_wq,
-				   &reg_timeout, 0);
+	if (!request_wiphy)
 		return -ENODEV;
-	}
 
 	if (!driver_request->intersect) {
 		if (request_wiphy->regd)
@@ -2935,11 +2980,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd,
 	}
 
 	request_wiphy = wiphy_idx_to_wiphy(country_ie_request->wiphy_idx);
-	if (!request_wiphy) {
-		queue_delayed_work(system_power_efficient_wq,
-				   &reg_timeout, 0);
+	if (!request_wiphy)
 		return -ENODEV;
-	}
 
 	if (country_ie_request->intersect)
 		return -EINVAL;
@@ -2966,7 +3008,7 @@ int set_regdom(const struct ieee80211_regdomain *rd,
 	}
 
 	if (regd_src == REGD_SOURCE_CRDA)
-		reg_crda_timeouts = 0;
+		reset_crda_timeouts();
 
 	lr = get_last_request();
 
@@ -3123,15 +3165,6 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy)
 	lr->country_ie_env = ENVIRON_ANY;
 }
 
-static void reg_timeout_work(struct work_struct *work)
-{
-	REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
-	rtnl_lock();
-	reg_crda_timeouts++;
-	restore_regulatory_settings(true);
-	rtnl_unlock();
-}
-
 /*
  * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for
  * UNII band definitions
@@ -3217,7 +3250,7 @@ void regulatory_exit(void)
 	struct reg_beacon *reg_beacon, *btmp;
 
 	cancel_work_sync(&reg_work);
-	cancel_delayed_work_sync(&reg_timeout);
+	cancel_crda_timeout_sync();
 	cancel_delayed_work_sync(&reg_check_chans);
 
 	/* Lock to suppress warnings */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 3a50aa2553bf..14d5369eb778 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -266,8 +266,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk)
 			spin_lock_bh(&rdev->bss_lock);
 			__cfg80211_bss_expire(rdev, request->scan_start);
 			spin_unlock_bh(&rdev->bss_lock);
-			request->scan_start =
-				jiffies + msecs_to_jiffies(request->interval);
+			request->scan_start = jiffies;
 		}
 		nl80211_send_sched_scan_results(rdev, request->dev);
 	}
@@ -839,6 +838,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 			found->pub.signal = tmp->pub.signal;
 		found->pub.capability = tmp->pub.capability;
 		found->ts = tmp->ts;
+		found->ts_boottime = tmp->ts_boottime;
 	} else {
 		struct cfg80211_internal_bss *new;
 		struct cfg80211_internal_bss *hidden;
@@ -938,14 +938,13 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
 }
 
 /* Returned bss is reference counted and must be cleaned up appropriately. */
-struct cfg80211_bss*
-cfg80211_inform_bss_width(struct wiphy *wiphy,
-			  struct ieee80211_channel *rx_channel,
-			  enum nl80211_bss_scan_width scan_width,
-			  enum cfg80211_bss_frame_type ftype,
-			  const u8 *bssid, u64 tsf, u16 capability,
-			  u16 beacon_interval, const u8 *ie, size_t ielen,
-			  s32 signal, gfp_t gfp)
+struct cfg80211_bss *
+cfg80211_inform_bss_data(struct wiphy *wiphy,
+			 struct cfg80211_inform_bss *data,
+			 enum cfg80211_bss_frame_type ftype,
+			 const u8 *bssid, u64 tsf, u16 capability,
+			 u16 beacon_interval, const u8 *ie, size_t ielen,
+			 gfp_t gfp)
 {
 	struct cfg80211_bss_ies *ies;
 	struct ieee80211_channel *channel;
@@ -957,19 +956,21 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 		return NULL;
 
 	if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
-			(signal < 0 || signal > 100)))
+		    (data->signal < 0 || data->signal > 100)))
 		return NULL;
 
-	channel = cfg80211_get_bss_channel(wiphy, ie, ielen, rx_channel);
+	channel = cfg80211_get_bss_channel(wiphy, ie, ielen, data->chan);
 	if (!channel)
 		return NULL;
 
 	memcpy(tmp.pub.bssid, bssid, ETH_ALEN);
 	tmp.pub.channel = channel;
-	tmp.pub.scan_width = scan_width;
-	tmp.pub.signal = signal;
+	tmp.pub.scan_width = data->scan_width;
+	tmp.pub.signal = data->signal;
 	tmp.pub.beacon_interval = beacon_interval;
 	tmp.pub.capability = capability;
+	tmp.ts_boottime = data->boottime_ns;
+
 	/*
 	 * If we do not know here whether the IEs are from a Beacon or Probe
 	 * Response frame, we need to pick one of the options and only use it
@@ -999,7 +1000,7 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 	}
 	rcu_assign_pointer(tmp.pub.ies, ies);
 
-	signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+	signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
 		wiphy->max_adj_channel_rssi_comp;
 	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
 	if (!res)
@@ -1019,15 +1020,15 @@ cfg80211_inform_bss_width(struct wiphy *wiphy,
 	/* cfg80211_bss_update gives us a referenced result */
 	return &res->pub;
 }
-EXPORT_SYMBOL(cfg80211_inform_bss_width);
+EXPORT_SYMBOL(cfg80211_inform_bss_data);
 
-/* Returned bss is reference counted and must be cleaned up appropriately. */
+/* cfg80211_inform_bss_width_frame helper */
 struct cfg80211_bss *
-cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
-				struct ieee80211_channel *rx_channel,
-				enum nl80211_bss_scan_width scan_width,
-				struct ieee80211_mgmt *mgmt, size_t len,
-				s32 signal, gfp_t gfp)
+cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
+			       struct cfg80211_inform_bss *data,
+			       struct ieee80211_mgmt *mgmt, size_t len,
+			       gfp_t gfp)
+
 {
 	struct cfg80211_internal_bss tmp = {}, *res;
 	struct cfg80211_bss_ies *ies;
@@ -1040,8 +1041,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 	BUILD_BUG_ON(offsetof(struct ieee80211_mgmt, u.probe_resp.variable) !=
 			offsetof(struct ieee80211_mgmt, u.beacon.variable));
 
-	trace_cfg80211_inform_bss_width_frame(wiphy, rx_channel, scan_width, mgmt,
-					      len, signal);
+	trace_cfg80211_inform_bss_frame(wiphy, data, mgmt, len);
 
 	if (WARN_ON(!mgmt))
 		return NULL;
@@ -1050,14 +1050,14 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 		return NULL;
 
 	if (WARN_ON(wiphy->signal_type == CFG80211_SIGNAL_TYPE_UNSPEC &&
-		    (signal < 0 || signal > 100)))
+		    (data->signal < 0 || data->signal > 100)))
 		return NULL;
 
 	if (WARN_ON(len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
 		return NULL;
 
 	channel = cfg80211_get_bss_channel(wiphy, mgmt->u.beacon.variable,
-					   ielen, rx_channel);
+					   ielen, data->chan);
 	if (!channel)
 		return NULL;
 
@@ -1077,12 +1077,13 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 	
 	memcpy(tmp.pub.bssid, mgmt->bssid, ETH_ALEN);
 	tmp.pub.channel = channel;
-	tmp.pub.scan_width = scan_width;
-	tmp.pub.signal = signal;
+	tmp.pub.scan_width = data->scan_width;
+	tmp.pub.signal = data->signal;
 	tmp.pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	tmp.pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
+	tmp.ts_boottime = data->boottime_ns;
 
-	signal_valid = abs(rx_channel->center_freq - channel->center_freq) <=
+	signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
 		wiphy->max_adj_channel_rssi_comp;
 	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
 	if (!res)
@@ -1102,7 +1103,7 @@ cfg80211_inform_bss_width_frame(struct wiphy *wiphy,
 	/* cfg80211_bss_update gives us a referenced result */
 	return &res->pub;
 }
-EXPORT_SYMBOL(cfg80211_inform_bss_width_frame);
+EXPORT_SYMBOL(cfg80211_inform_bss_frame_data);
 
 void cfg80211_ref_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 {
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index a808279a432a..0c392d36781b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2670,30 +2670,30 @@ TRACE_EVENT(cfg80211_get_bss,
 		  __entry->privacy)
 );
 
-TRACE_EVENT(cfg80211_inform_bss_width_frame,
-	TP_PROTO(struct wiphy *wiphy, struct ieee80211_channel *channel,
-		 enum nl80211_bss_scan_width scan_width,
-		 struct ieee80211_mgmt *mgmt, size_t len,
-		 s32 signal),
-	TP_ARGS(wiphy, channel, scan_width, mgmt, len, signal),
+TRACE_EVENT(cfg80211_inform_bss_frame,
+	TP_PROTO(struct wiphy *wiphy, struct cfg80211_inform_bss *data,
+		 struct ieee80211_mgmt *mgmt, size_t len),
+	TP_ARGS(wiphy, data, mgmt, len),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		CHAN_ENTRY
 		__field(enum nl80211_bss_scan_width, scan_width)
 		__dynamic_array(u8, mgmt, len)
 		__field(s32, signal)
+		__field(u64, ts_boottime)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
-		CHAN_ASSIGN(channel);
-		__entry->scan_width = scan_width;
+		CHAN_ASSIGN(data->chan);
+		__entry->scan_width = data->scan_width;
 		if (mgmt)
 			memcpy(__get_dynamic_array(mgmt), mgmt, len);
-		__entry->signal = signal;
+		__entry->signal = data->signal;
+		__entry->ts_boottime = data->boottime_ns;
 	),
-	TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d",
+	TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT "(scan_width: %d) signal: %d, tsb:%llu",
 		  WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
-		  __entry->signal)
+		  __entry->signal, (unsigned long long)__entry->ts_boottime)
 );
 
 DECLARE_EVENT_CLASS(cfg80211_bss_evt,
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 60ce7014e1b0..ad7f5b3f9b61 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -330,8 +330,10 @@ resume:
 
 		if (x->sel.family == AF_UNSPEC) {
 			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
-			if (inner_mode == NULL)
+			if (inner_mode == NULL) {
+				XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
 				goto drop;
+			}
 		}
 
 		if (inner_mode->input(x, skb)) {
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 68ada2ca4b60..cc3676eb6239 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -19,7 +19,7 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 
-static int xfrm_output2(struct sock *sk, struct sk_buff *skb);
+static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 static int xfrm_skb_check_space(struct sk_buff *skb)
 {
@@ -131,18 +131,20 @@ out:
 
 int xfrm_output_resume(struct sk_buff *skb, int err)
 {
+	struct net *net = xs_net(skb_dst(skb)->xfrm);
+
 	while (likely((err = xfrm_output_one(skb, err)) == 0)) {
 		nf_reset(skb);
 
-		err = skb_dst(skb)->ops->local_out(skb);
+		err = skb_dst(skb)->ops->local_out(net, skb->sk, skb);
 		if (unlikely(err != 1))
 			goto out;
 
 		if (!skb_dst(skb)->xfrm)
-			return dst_output(skb);
+			return dst_output(net, skb->sk, skb);
 
 		err = nf_hook(skb_dst(skb)->ops->family,
-			      NF_INET_POST_ROUTING, skb->sk, skb,
+			      NF_INET_POST_ROUTING, net, skb->sk, skb,
 			      NULL, skb_dst(skb)->dev, xfrm_output2);
 		if (unlikely(err != 1))
 			goto out;
@@ -156,12 +158,12 @@ out:
 }
 EXPORT_SYMBOL_GPL(xfrm_output_resume);
 
-static int xfrm_output2(struct sock *sk, struct sk_buff *skb)
+static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	return xfrm_output_resume(skb, 1);
 }
 
-static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
+static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct sk_buff *segs;
 
@@ -177,7 +179,7 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
 		int err;
 
 		segs->next = NULL;
-		err = xfrm_output2(sk, segs);
+		err = xfrm_output2(net, sk, segs);
 
 		if (unlikely(err)) {
 			kfree_skb_list(nskb);
@@ -196,7 +198,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 	int err;
 
 	if (skb_is_gso(skb))
-		return xfrm_output_gso(sk, skb);
+		return xfrm_output_gso(net, sk, skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		err = skb_checksum_help(skb);
@@ -207,7 +209,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 
-	return xfrm_output2(sk, skb);
+	return xfrm_output2(net, sk, skb);
 }
 EXPORT_SYMBOL_GPL(xfrm_output);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 94af3d065785..09bfcbac63bb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1208,7 +1208,7 @@ static inline int policy_to_flow_dir(int dir)
 	}
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
+static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 						 const struct flowi *fl)
 {
 	struct xfrm_policy *pol;
@@ -1583,8 +1583,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 
 		memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
 		xdst->flo.ops = &xfrm_bundle_fc_ops;
-		if (afinfo->init_dst)
-			afinfo->init_dst(net, xdst);
 	} else
 		xdst = ERR_PTR(-ENOBUFS);
 
@@ -1889,6 +1887,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
 	struct sock *sk;
 	struct dst_entry *dst;
 	struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+	struct net *net = xp_net(pol);
 	struct xfrm_policy_queue *pq = &pol->polq;
 	struct flowi fl;
 	struct sk_buff_head list;
@@ -1905,8 +1904,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
 	spin_unlock(&pq->hold_queue.lock);
 
 	dst_hold(dst->path);
-	dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
-			  sk, 0);
+	dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
 	if (IS_ERR(dst))
 		goto purge_queue;
 
@@ -1936,8 +1934,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
 
 		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
 		dst_hold(skb_dst(skb)->path);
-		dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
-				  &fl, skb->sk, 0);
+		dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
 		if (IS_ERR(dst)) {
 			kfree_skb(skb);
 			continue;
@@ -1947,7 +1944,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
 		skb_dst_drop(skb);
 		skb_dst_set(skb, dst);
 
-		dst_output(skb);
+		dst_output(net, skb->sk, skb);
 	}
 
 out:
@@ -1960,7 +1957,7 @@ purge_queue:
 	xfrm_pol_put(pol);
 }
 
-static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
+static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	unsigned long sched_next;
 	struct dst_entry *dst = skb_dst(skb);
@@ -2187,7 +2184,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
  */
 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
 			      const struct flowi *fl,
-			      struct sock *sk, int flags)
+			      const struct sock *sk, int flags)
 {
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
 	struct flow_cache_object *flo;
@@ -2335,7 +2332,7 @@ EXPORT_SYMBOL(xfrm_lookup);
  */
 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
 				    const struct flowi *fl,
-				    struct sock *sk, int flags)
+				    const struct sock *sk, int flags)
 {
 	struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
 					    flags | XFRM_LOOKUP_QUEUE |
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 24e06a2377f6..805681a7d356 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -31,6 +31,7 @@
 #if IS_ENABLED(CONFIG_IPV6)
 #include <linux/in6.h>
 #endif
+#include <asm/unaligned.h>
 
 static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
 {
@@ -728,7 +729,9 @@ static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
 	memcpy(&p->sel, &x->sel, sizeof(p->sel));
 	memcpy(&p->lft, &x->lft, sizeof(p->lft));
 	memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
-	memcpy(&p->stats, &x->stats, sizeof(p->stats));
+	put_unaligned(x->stats.replay_window, &p->stats.replay_window);
+	put_unaligned(x->stats.replay, &p->stats.replay);
+	put_unaligned(x->stats.integrity_failed, &p->stats.integrity_failed);
 	memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr));
 	p->mode = x->props.mode;
 	p->replay_window = x->props.replay_window;