From 38516ab59fbc5b3bb278cf5e1fe2867c70cff32e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 20 Apr 2010 17:04:50 -0400
Subject: tracing: Let tracepoints have data passed to tracepoint callbacks

This patch adds data to be passed to tracepoint callbacks.

The created functions from DECLARE_TRACE() now need a mandatory data
parameter. For example:

DECLARE_TRACE(mytracepoint, int value, value)

Will create the register function:

int register_trace_mytracepoint((void(*)(void *data, int value))probe,
                                void *data);

As the first argument, all callbacks (probes) must take a (void *data)
parameter. So a callback for the above tracepoint will look like:

void myprobe(void *data, int value)
{
}

The callback may choose to ignore the data parameter.

This change allows callbacks to register a private data pointer along
with the function probe.

	void mycallback(void *data, int value);

	register_trace_mytracepoint(mycallback, mydata);

Then the mycallback() will receive the "mydata" as the first parameter
before the args.

A more detailed example:

  DECLARE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status));

  /* In the C file */

  DEFINE_TRACE(mytracepoint, TP_PROTO(int status), TP_ARGS(status));

  [...]

       trace_mytracepoint(status);

  /* In a file registering this tracepoint */

  int my_callback(void *data, int status)
  {
	struct my_struct my_data = data;
	[...]
  }

  [...]
	my_data = kmalloc(sizeof(*my_data), GFP_KERNEL);
	init_my_data(my_data);
	register_trace_mytracepoint(my_callback, my_data);

The same callback can also be registered to the same tracepoint as long
as the data registered is different. Note, the data must also be used
to unregister the callback:

	unregister_trace_mytracepoint(my_callback, my_data);

Because of the data parameter, tracepoints declared this way can not have
no args. That is:

  DECLARE_TRACE(mytracepoint, TP_PROTO(void), TP_ARGS());

will cause an error.

If no arguments are needed, a new macro can be used instead:

  DECLARE_TRACE_NOARGS(mytracepoint);

Since there are no arguments, the proto and args fields are left out.

This is part of a series to make the tracepoint footprint smaller:

   text	   data	    bss	    dec	    hex	filename
4913961	1088356	 861512	6863829	 68bbd5	vmlinux.orig
4914025	1088868	 861512	6864405	 68be15	vmlinux.class
4918492	1084612	 861512	6864616	 68bee8	vmlinux.tracepoint

Again, this patch also increases the size of the kernel, but
lays the ground work for decreasing it.

 v5: Fixed net/core/drop_monitor.c to handle these updates.

 v4: Moved the DECLARE_TRACE() DECLARE_TRACE_NOARGS out of the
     #ifdef CONFIG_TRACE_POINTS, since the two are the same in both
     cases. The __DECLARE_TRACE() is what changes.
     Thanks to Frederic Weisbecker for pointing this out.

 v3: Made all register_* functions require data to be passed and
     all callbacks to take a void * parameter as its first argument.
     This makes the calling functions comply with C standards.

     Also added more comments to the modifications of DECLARE_TRACE().

 v2: Made the DECLARE_TRACE() have the ability to pass arguments
     and added a new DECLARE_TRACE_NOARGS() for tracepoints that
     do not need any arguments.

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Acked-by: Masami Hiramatsu <mhiramat@redhat.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 net/core/drop_monitor.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index cf208d8042b1..ad41529fb60f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -172,12 +172,12 @@ out:
 	return;
 }
 
-static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location)
 {
 	trace_drop_common(skb, location);
 }
 
-static void trace_napi_poll_hit(struct napi_struct *napi)
+static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi)
 {
 	struct dm_hw_stat_delta *new_stat;
 
@@ -225,12 +225,12 @@ static int set_all_monitor_traces(int state)
 
 	switch (state) {
 	case TRACE_ON:
-		rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
-		rc |= register_trace_napi_poll(trace_napi_poll_hit);
+		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+		rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
 		break;
 	case TRACE_OFF:
-		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
-		rc |= unregister_trace_napi_poll(trace_napi_poll_hit);
+		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+		rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
 
 		tracepoint_synchronize_unregister();
 
-- 
cgit v1.2.3-58-ga151


From dc3f5e68f846eec38fb31d78f0b6e83633ad375e Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 13 Apr 2010 16:11:50 +0300
Subject: trans_virtio: use virtqueue_xxx wrappers

Switch trans_virtio to new virtqueue_xxx wrappers.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 net/9p/trans_virtio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7eb78ecc1618..dcfbe99ff81c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -137,7 +137,7 @@ static void req_done(struct virtqueue *vq)
 
 	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
 
-	while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) {
+	while ((rc = virtqueue_get_buf(chan->vq, &len)) != NULL) {
 		P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
 		P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
 		req = p9_tag_lookup(chan->client, rc->tag);
@@ -209,13 +209,13 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 	req->status = REQ_STATUS_SENT;
 
-	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
+	if (virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc add_buf returned failure");
 		return -EIO;
 	}
 
-	chan->vq->vq_ops->kick(chan->vq);
+	virtqueue_kick(chan->vq);
 
 	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
 	return 0;
-- 
cgit v1.2.3-58-ga151


From 2c3c8bea608866d8bd9dcf92657d57fdcac011c5 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Wed, 12 May 2010 18:28:57 -0700
Subject: sysfs: add struct file* to bin_attr callbacks

This allows bin_attr->read,write,mmap callbacks to check file specific data
(such as inode owner) as part of any privilege validation.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/alpha/kernel/pci-sysfs.c        |  8 ++++---
 arch/mips/txx9/generic/setup.c       |  4 ++--
 arch/powerpc/sysdev/mv64x60_pci.c    |  4 ++--
 arch/s390/kernel/ipl.c               | 14 ++++++------
 drivers/acpi/system.c                |  2 +-
 drivers/base/firmware_class.c        | 11 ++++++----
 drivers/firmware/dcdbas.c            |  4 ++--
 drivers/firmware/dell_rbu.c          | 10 ++++-----
 drivers/firmware/efivars.c           |  4 ++--
 drivers/gpu/drm/drm_sysfs.c          |  5 +++--
 drivers/misc/c2port/core.c           |  4 ++--
 drivers/misc/ds1682.c                |  6 ++++--
 drivers/misc/eeprom/at24.c           |  6 ++++--
 drivers/misc/eeprom/at25.c           |  6 ++++--
 drivers/misc/eeprom/eeprom.c         |  3 ++-
 drivers/misc/eeprom/max6875.c        |  2 +-
 drivers/net/netxen/netxen_nic_main.c | 11 ++++++----
 drivers/net/qlcnic/qlcnic_main.c     | 12 +++++++----
 drivers/pci/hotplug/acpiphp_ibm.c    |  5 +++--
 drivers/pci/pci-sysfs.c              | 42 +++++++++++++++++++++++++-----------
 drivers/pcmcia/cistpl.c              |  4 ++--
 drivers/power/olpc_battery.c         |  2 +-
 drivers/rapidio/rio-sysfs.c          |  6 ++++--
 drivers/rtc/rtc-cmos.c               |  6 ++++--
 drivers/rtc/rtc-ds1305.c             |  6 ++++--
 drivers/rtc/rtc-ds1307.c             |  6 ++++--
 drivers/rtc/rtc-ds1511.c             | 10 +++++----
 drivers/rtc/rtc-ds1553.c             |  4 ++--
 drivers/rtc/rtc-ds1742.c             |  4 ++--
 drivers/rtc/rtc-m48t59.c             |  4 ++--
 drivers/rtc/rtc-stk17ta8.c           |  4 ++--
 drivers/rtc/rtc-tx4939.c             |  4 ++--
 drivers/s390/cio/chp.c               |  5 +++--
 drivers/scsi/3w-sas.c                |  4 ++--
 drivers/scsi/arcmsr/arcmsr_attr.c    |  9 +++++---
 drivers/scsi/ibmvscsi/ibmvfc.c       |  3 ++-
 drivers/scsi/ipr.c                   |  9 +++++---
 drivers/scsi/lpfc/lpfc_attr.c        | 20 ++++++++++++-----
 drivers/scsi/qla2xxx/qla_attr.c      | 32 +++++++++++++--------------
 drivers/staging/udlfb/udlfb.c        |  3 ++-
 drivers/usb/core/sysfs.c             |  3 ++-
 drivers/video/aty/radeon_base.c      |  4 ++--
 drivers/w1/slaves/w1_ds2431.c        |  4 ++--
 drivers/w1/slaves/w1_ds2433.c        |  4 ++--
 drivers/w1/slaves/w1_ds2760.c        |  2 +-
 drivers/w1/w1.c                      |  4 ++--
 drivers/zorro/zorro-sysfs.c          |  2 +-
 fs/sysfs/bin.c                       | 24 ++++++++++-----------
 include/linux/sysfs.h                |  7 +++---
 kernel/ksysfs.c                      |  3 ++-
 kernel/module.c                      |  2 +-
 net/bridge/br_sysfs_br.c             |  2 +-
 52 files changed, 220 insertions(+), 149 deletions(-)

(limited to 'net')

diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index d979e7c7bc4b..a5fffc882c72 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -53,6 +53,7 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num,
 
 /**
  * pci_mmap_resource - map a PCI resource into user memory space
+ * @filp: open sysfs file
  * @kobj: kobject for mapping
  * @attr: struct bin_attribute for the file being mapped
  * @vma: struct vm_area_struct passed into the mmap
@@ -60,7 +61,8 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num,
  *
  * Use the bus mapping routines to map a PCI resource into userspace.
  */
-static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
+static int pci_mmap_resource(struct file *filp, struct kobject *kobj,
+			     struct bin_attribute *attr,
 			     struct vm_area_struct *vma, int sparse)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
@@ -89,14 +91,14 @@ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 	return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse);
 }
 
-static int pci_mmap_resource_sparse(struct kobject *kobj,
+static int pci_mmap_resource_sparse(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *attr,
 				    struct vm_area_struct *vma)
 {
 	return pci_mmap_resource(kobj, attr, vma, 1);
 }
 
-static int pci_mmap_resource_dense(struct kobject *kobj,
+static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *attr,
 				   struct vm_area_struct *vma)
 {
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index adc69291f9e2..575d219b8001 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -905,7 +905,7 @@ struct txx9_sramc_sysdev {
 	void __iomem *base;
 };
 
-static ssize_t txx9_sram_read(struct kobject *kobj,
+static ssize_t txx9_sram_read(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t pos, size_t size)
 {
@@ -920,7 +920,7 @@ static ssize_t txx9_sram_read(struct kobject *kobj,
 	return size;
 }
 
-static ssize_t txx9_sram_write(struct kobject *kobj,
+static ssize_t txx9_sram_write(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t pos, size_t size)
 {
diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c
index 1456015a22d8..198f288570cc 100644
--- a/arch/powerpc/sysdev/mv64x60_pci.c
+++ b/arch/powerpc/sysdev/mv64x60_pci.c
@@ -24,7 +24,7 @@
 #define MV64X60_VAL_LEN_MAX		11
 #define MV64X60_PCICFG_CPCI_HOTSWAP	0x68
 
-static ssize_t mv64x60_hs_reg_read(struct kobject *kobj,
+static ssize_t mv64x60_hs_reg_read(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *attr, char *buf,
 				   loff_t off, size_t count)
 {
@@ -45,7 +45,7 @@ static ssize_t mv64x60_hs_reg_read(struct kobject *kobj,
 	return sprintf(buf, "0x%08x\n", v);
 }
 
-static ssize_t mv64x60_hs_reg_write(struct kobject *kobj,
+static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *attr, char *buf,
 				    loff_t off, size_t count)
 {
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 72c8b0d070c8..a689070be287 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -403,8 +403,9 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
 static struct kobj_attribute sys_ipl_device_attr =
 	__ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
 
-static ssize_t ipl_parameter_read(struct kobject *kobj, struct bin_attribute *attr,
-				  char *buf, loff_t off, size_t count)
+static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr, char *buf,
+				  loff_t off, size_t count)
 {
 	return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
 					IPL_PARMBLOCK_SIZE);
@@ -419,8 +420,9 @@ static struct bin_attribute ipl_parameter_attr = {
 	.read = &ipl_parameter_read,
 };
 
-static ssize_t ipl_scp_data_read(struct kobject *kobj, struct bin_attribute *attr,
-				 char *buf, loff_t off, size_t count)
+static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
 {
 	unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
 	void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
@@ -694,7 +696,7 @@ static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
 
 /* FCP reipl device attributes */
 
-static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj,
+static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
 				      struct bin_attribute *attr,
 				      char *buf, loff_t off, size_t count)
 {
@@ -704,7 +706,7 @@ static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj,
 	return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
 
-static ssize_t reipl_fcp_scpdata_write(struct kobject *kobj,
+static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
 				       struct bin_attribute *attr,
 				       char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index e35525b39f6b..c79e789ed03a 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -71,7 +71,7 @@ struct acpi_table_attr {
 	struct list_head node;
 };
 
-static ssize_t acpi_table_show(struct kobject *kobj,
+static ssize_t acpi_table_show(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr, char *buf,
 			       loff_t offset, size_t count)
 {
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index d98e424675cf..3f093b0dd217 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -278,8 +278,9 @@ static ssize_t firmware_loading_store(struct device *dev,
 static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store);
 
 static ssize_t
-firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
-		   char *buffer, loff_t offset, size_t count)
+firmware_data_read(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *bin_attr, char *buffer, loff_t offset,
+		   size_t count)
 {
 	struct device *dev = to_dev(kobj);
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
@@ -362,6 +363,7 @@ fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
 
 /**
  * firmware_data_write - write method for firmware
+ * @filp: open sysfs file
  * @kobj: kobject for the device
  * @bin_attr: bin_attr structure
  * @buffer: buffer being written
@@ -372,8 +374,9 @@ fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
  *	the driver as a firmware image.
  **/
 static ssize_t
-firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr,
-		    char *buffer, loff_t offset, size_t count)
+firmware_data_write(struct file* filp, struct kobject *kobj,
+		    struct bin_attribute *bin_attr, char *buffer,
+		    loff_t offset, size_t count)
 {
 	struct device *dev = to_dev(kobj);
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index fb09bb3c0ad6..aa9bc9e980e1 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -149,7 +149,7 @@ static ssize_t smi_data_buf_size_store(struct device *dev,
 	return count;
 }
 
-static ssize_t smi_data_read(struct kobject *kobj,
+static ssize_t smi_data_read(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
 {
@@ -162,7 +162,7 @@ static ssize_t smi_data_read(struct kobject *kobj,
 	return ret;
 }
 
-static ssize_t smi_data_write(struct kobject *kobj,
+static ssize_t smi_data_write(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t pos, size_t count)
 {
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 3a4460265b10..2f452f1f7c8a 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -522,7 +522,7 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
 			rbu_data.image_update_buffer, rbu_data.bios_image_size);
 }
 
-static ssize_t read_rbu_data(struct kobject *kobj,
+static ssize_t read_rbu_data(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buffer, loff_t pos, size_t count)
 {
@@ -576,7 +576,7 @@ static void callbackfn_rbu(const struct firmware *fw, void *context)
 	release_firmware(fw);
 }
 
-static ssize_t read_rbu_image_type(struct kobject *kobj,
+static ssize_t read_rbu_image_type(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *bin_attr,
 				   char *buffer, loff_t pos, size_t count)
 {
@@ -586,7 +586,7 @@ static ssize_t read_rbu_image_type(struct kobject *kobj,
 	return size;
 }
 
-static ssize_t write_rbu_image_type(struct kobject *kobj,
+static ssize_t write_rbu_image_type(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *bin_attr,
 				    char *buffer, loff_t pos, size_t count)
 {
@@ -647,7 +647,7 @@ static ssize_t write_rbu_image_type(struct kobject *kobj,
 	return rc;
 }
 
-static ssize_t read_rbu_packet_size(struct kobject *kobj,
+static ssize_t read_rbu_packet_size(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *bin_attr,
 				    char *buffer, loff_t pos, size_t count)
 {
@@ -660,7 +660,7 @@ static ssize_t read_rbu_packet_size(struct kobject *kobj,
 	return size;
 }
 
-static ssize_t write_rbu_packet_size(struct kobject *kobj,
+static ssize_t write_rbu_packet_size(struct file *filp, struct kobject *kobj,
 				     struct bin_attribute *bin_attr,
 				     char *buffer, loff_t pos, size_t count)
 {
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 81b70bd07586..2a62ec6390e0 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -402,7 +402,7 @@ efivar_unregister(struct efivar_entry *var)
 }
 
 
-static ssize_t efivar_create(struct kobject *kobj,
+static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
 {
@@ -461,7 +461,7 @@ static ssize_t efivar_create(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t efivar_delete(struct kobject *kobj,
+static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
 {
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 25bbd30ed7af..387166d5a109 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -193,8 +193,9 @@ static ssize_t enabled_show(struct device *device,
 			"disabled");
 }
 
-static ssize_t edid_show(struct kobject *kobj, struct bin_attribute *attr,
-			 char *buf, loff_t off, size_t count)
+static ssize_t edid_show(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *attr, char *buf, loff_t off,
+			 size_t count)
 {
 	struct device *connector_dev = container_of(kobj, struct device, kobj);
 	struct drm_connector *connector = to_drm_connector(connector_dev);
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
index ed090e77c9cd..19fc7c1cb428 100644
--- a/drivers/misc/c2port/core.c
+++ b/drivers/misc/c2port/core.c
@@ -707,7 +707,7 @@ static ssize_t __c2port_read_flash_data(struct c2port_device *dev,
 	return nread;
 }
 
-static ssize_t c2port_read_flash_data(struct kobject *kobj,
+static ssize_t c2port_read_flash_data(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *attr,
 				char *buffer, loff_t offset, size_t count)
 {
@@ -824,7 +824,7 @@ static ssize_t __c2port_write_flash_data(struct c2port_device *dev,
 	return nwrite;
 }
 
-static ssize_t c2port_write_flash_data(struct kobject *kobj,
+static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *attr,
 				char *buffer, loff_t offset, size_t count)
 {
diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c
index 9197cfc55015..a513f0aa6432 100644
--- a/drivers/misc/ds1682.c
+++ b/drivers/misc/ds1682.c
@@ -140,7 +140,8 @@ static const struct attribute_group ds1682_group = {
 /*
  * User data attribute
  */
-static ssize_t ds1682_eeprom_read(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr,
 				  char *buf, loff_t off, size_t count)
 {
 	struct i2c_client *client = kobj_to_i2c_client(kobj);
@@ -163,7 +164,8 @@ static ssize_t ds1682_eeprom_read(struct kobject *kobj, struct bin_attribute *at
 	return count;
 }
 
-static ssize_t ds1682_eeprom_write(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *attr,
 				   char *buf, loff_t off, size_t count)
 {
 	struct i2c_client *client = kobj_to_i2c_client(kobj);
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index db7d0f21b65d..a79a62f75481 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -274,7 +274,8 @@ static ssize_t at24_read(struct at24_data *at24,
 	return retval;
 }
 
-static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t at24_bin_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct at24_data *at24;
@@ -395,7 +396,8 @@ static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
 	return retval;
 }
 
-static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct at24_data *at24;
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index d194212a41f6..c627e4174ccd 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -126,7 +126,8 @@ at25_ee_read(
 }
 
 static ssize_t
-at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+at25_bin_read(struct file *filp, struct kobject *kobj,
+	      struct bin_attribute *bin_attr,
 	      char *buf, loff_t off, size_t count)
 {
 	struct device		*dev;
@@ -253,7 +254,8 @@ at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
 }
 
 static ssize_t
-at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+at25_bin_write(struct file *filp, struct kobject *kobj,
+	       struct bin_attribute *bin_attr,
 	       char *buf, loff_t off, size_t count)
 {
 	struct device		*dev;
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index e306a8cd2f96..45060ddc4e59 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -81,7 +81,8 @@ exit:
 	mutex_unlock(&data->update_lock);
 }
 
-static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
+			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
 {
 	struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj));
diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
index fe2909278507..5653a3ce0517 100644
--- a/drivers/misc/eeprom/max6875.c
+++ b/drivers/misc/eeprom/max6875.c
@@ -107,7 +107,7 @@ exit_up:
 	mutex_unlock(&data->update_lock);
 }
 
-static ssize_t max6875_read(struct kobject *kobj,
+static ssize_t max6875_read(struct file *filp, struct kobject *kobj,
 			    struct bin_attribute *bin_attr,
 			    char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index c61a61f177b7..6ce6ce1df6d2 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -2560,7 +2560,8 @@ netxen_sysfs_validate_crb(struct netxen_adapter *adapter,
 }
 
 static ssize_t
-netxen_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
+netxen_sysfs_read_crb(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2587,7 +2588,8 @@ netxen_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-netxen_sysfs_write_crb(struct kobject *kobj, struct bin_attribute *attr,
+netxen_sysfs_write_crb(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2627,7 +2629,8 @@ netxen_sysfs_validate_mem(struct netxen_adapter *adapter,
 }
 
 static ssize_t
-netxen_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr,
+netxen_sysfs_read_mem(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2647,7 +2650,7 @@ netxen_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr,
 	return size;
 }
 
-static ssize_t netxen_sysfs_write_mem(struct kobject *kobj,
+static ssize_t netxen_sysfs_write_mem(struct file *filp, struct kobject *kobj,
 		struct bin_attribute *attr, char *buf,
 		loff_t offset, size_t size)
 {
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 1003eb76fda3..23ea9caa5261 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -2464,7 +2464,8 @@ qlcnic_sysfs_validate_crb(struct qlcnic_adapter *adapter,
 }
 
 static ssize_t
-qlcnic_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
+qlcnic_sysfs_read_crb(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2488,7 +2489,8 @@ qlcnic_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-qlcnic_sysfs_write_crb(struct kobject *kobj, struct bin_attribute *attr,
+qlcnic_sysfs_write_crb(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2525,7 +2527,8 @@ qlcnic_sysfs_validate_mem(struct qlcnic_adapter *adapter,
 }
 
 static ssize_t
-qlcnic_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr,
+qlcnic_sysfs_read_mem(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2546,7 +2549,8 @@ qlcnic_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-qlcnic_sysfs_write_mem(struct kobject *kobj, struct bin_attribute *attr,
+qlcnic_sysfs_write_mem(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 6ecbfb27db9d..e525263210ee 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -108,7 +108,7 @@ static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status);
 static int ibm_get_attention_status(struct hotplug_slot *slot, u8 *status);
 static void ibm_handle_events(acpi_handle handle, u32 event, void *context);
 static int ibm_get_table_from_acpi(char **bufp);
-static ssize_t ibm_read_apci_table(struct kobject *kobj,
+static ssize_t ibm_read_apci_table(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *bin_attr,
 				   char *buffer, loff_t pos, size_t size);
 static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
@@ -351,6 +351,7 @@ read_table_done:
 
 /**
  * ibm_read_apci_table - callback for the sysfs apci_table file
+ * @filp: the open sysfs file
  * @kobj: the kobject this binary attribute is a part of
  * @bin_attr: struct bin_attribute for this file
  * @buffer: the kernel space buffer to fill
@@ -364,7 +365,7 @@ read_table_done:
  * things get really tricky here...
  * our solution is to only allow reading the table in all at once.
  */
-static ssize_t ibm_read_apci_table(struct kobject *kobj,
+static ssize_t ibm_read_apci_table(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *bin_attr,
 				   char *buffer, loff_t pos, size_t size)
 {
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index fad93983bfed..ad44557e65c4 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -357,7 +357,8 @@ boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
 struct device_attribute vga_attr = __ATTR_RO(boot_vga);
 
 static ssize_t
-pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_read_config(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
@@ -430,7 +431,8 @@ pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_write_config(struct file* filp, struct kobject *kobj,
+		 struct bin_attribute *bin_attr,
 		 char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
@@ -487,7 +489,8 @@ pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-read_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
+read_vpd_attr(struct file *filp, struct kobject *kobj,
+	      struct bin_attribute *bin_attr,
 	      char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev =
@@ -502,7 +505,8 @@ read_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
+write_vpd_attr(struct file *filp, struct kobject *kobj,
+	       struct bin_attribute *bin_attr,
 	       char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev =
@@ -519,6 +523,7 @@ write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
 #ifdef HAVE_PCI_LEGACY
 /**
  * pci_read_legacy_io - read byte(s) from legacy I/O port space
+ * @filp: open sysfs file
  * @kobj: kobject corresponding to file to read from
  * @bin_attr: struct bin_attribute for this file
  * @buf: buffer to store results
@@ -529,7 +534,8 @@ write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
  * callback routine (pci_legacy_read).
  */
 static ssize_t
-pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_read_legacy_io(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *bin_attr,
 		   char *buf, loff_t off, size_t count)
 {
         struct pci_bus *bus = to_pci_bus(container_of(kobj,
@@ -545,6 +551,7 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * pci_write_legacy_io - write byte(s) to legacy I/O port space
+ * @filp: open sysfs file
  * @kobj: kobject corresponding to file to read from
  * @bin_attr: struct bin_attribute for this file
  * @buf: buffer containing value to be written
@@ -555,7 +562,8 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * callback routine (pci_legacy_write).
  */
 static ssize_t
-pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_write_legacy_io(struct file *filp, struct kobject *kobj,
+		    struct bin_attribute *bin_attr,
 		    char *buf, loff_t off, size_t count)
 {
         struct pci_bus *bus = to_pci_bus(container_of(kobj,
@@ -570,6 +578,7 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * pci_mmap_legacy_mem - map legacy PCI memory into user memory space
+ * @filp: open sysfs file
  * @kobj: kobject corresponding to device to be mapped
  * @attr: struct bin_attribute for this file
  * @vma: struct vm_area_struct passed to mmap
@@ -579,7 +588,8 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * memory space.
  */
 static int
-pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
+pci_mmap_legacy_mem(struct file *filp, struct kobject *kobj,
+		    struct bin_attribute *attr,
                     struct vm_area_struct *vma)
 {
         struct pci_bus *bus = to_pci_bus(container_of(kobj,
@@ -591,6 +601,7 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
 
 /**
  * pci_mmap_legacy_io - map legacy PCI IO into user memory space
+ * @filp: open sysfs file
  * @kobj: kobject corresponding to device to be mapped
  * @attr: struct bin_attribute for this file
  * @vma: struct vm_area_struct passed to mmap
@@ -600,7 +611,8 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
  * memory space. Returns -ENOSYS if the operation isn't supported
  */
 static int
-pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
+pci_mmap_legacy_io(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *attr,
 		   struct vm_area_struct *vma)
 {
         struct pci_bus *bus = to_pci_bus(container_of(kobj,
@@ -750,14 +762,16 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static int
-pci_mmap_resource_uc(struct kobject *kobj, struct bin_attribute *attr,
+pci_mmap_resource_uc(struct file *filp, struct kobject *kobj,
+		     struct bin_attribute *attr,
 		     struct vm_area_struct *vma)
 {
 	return pci_mmap_resource(kobj, attr, vma, 0);
 }
 
 static int
-pci_mmap_resource_wc(struct kobject *kobj, struct bin_attribute *attr,
+pci_mmap_resource_wc(struct file *filp, struct kobject *kobj,
+		     struct bin_attribute *attr,
 		     struct vm_area_struct *vma)
 {
 	return pci_mmap_resource(kobj, attr, vma, 1);
@@ -861,6 +875,7 @@ void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
 
 /**
  * pci_write_rom - used to enable access to the PCI ROM display
+ * @filp: sysfs file
  * @kobj: kernel object handle
  * @bin_attr: struct bin_attribute for this file
  * @buf: user input
@@ -870,7 +885,8 @@ void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
  * writing anything except 0 enables it
  */
 static ssize_t
-pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_write_rom(struct file *filp, struct kobject *kobj,
+	      struct bin_attribute *bin_attr,
 	      char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
@@ -885,6 +901,7 @@ pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * pci_read_rom - read a PCI ROM
+ * @filp: sysfs file
  * @kobj: kernel object handle
  * @bin_attr: struct bin_attribute for this file
  * @buf: where to put the data we read from the ROM
@@ -895,7 +912,8 @@ pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
  * device corresponding to @kobj.
  */
 static ssize_t
-pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_read_rom(struct file *filp, struct kobject *kobj,
+	     struct bin_attribute *bin_attr,
 	     char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index 60d428be0b07..8844bc3e3118 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -1531,7 +1531,7 @@ static ssize_t pccard_extract_cis(struct pcmcia_socket *s, char *buf,
 }
 
 
-static ssize_t pccard_show_cis(struct kobject *kobj,
+static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
@@ -1562,7 +1562,7 @@ static ssize_t pccard_show_cis(struct kobject *kobj,
 }
 
 
-static ssize_t pccard_store_cis(struct kobject *kobj,
+static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 8fefe5a73558..baefcf1cffc9 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -354,7 +354,7 @@ static enum power_supply_property olpc_bat_props[] = {
 #define EEPROM_END	0x80
 #define EEPROM_SIZE	(EEPROM_END - EEPROM_START)
 
-static ssize_t olpc_bat_eeprom_read(struct kobject *kobj,
+static ssize_t olpc_bat_eeprom_read(struct file *filp, struct kobject *kobj,
 		struct bin_attribute *attr, char *buf, loff_t off, size_t count)
 {
 	uint8_t ec_byte;
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index ba742e82c57d..00b475658356 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -68,7 +68,8 @@ struct device_attribute rio_dev_attrs[] = {
 };
 
 static ssize_t
-rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
+rio_read_config(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct rio_dev *dev =
@@ -139,7 +140,8 @@ rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-rio_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
+rio_write_config(struct file *filp, struct kobject *kobj,
+		 struct bin_attribute *bin_attr,
 		 char *buf, loff_t off, size_t count)
 {
 	struct rio_dev *dev =
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index e9aa814ddd23..ece4dbddc0ea 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -519,7 +519,8 @@ static const struct rtc_class_ops cmos_rtc_ops = {
 #define NVRAM_OFFSET	(RTC_REG_D + 1)
 
 static ssize_t
-cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
+cmos_nvram_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	int	retval;
@@ -547,7 +548,8 @@ cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
+cmos_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct cmos_rtc	*cmos;
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 7836c9cec557..48da85e97ca4 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -542,7 +542,8 @@ static void msg_init(struct spi_message *m, struct spi_transfer *x,
 }
 
 static ssize_t
-ds1305_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
+ds1305_nvram_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct spi_device	*spi;
@@ -572,7 +573,8 @@ ds1305_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
+ds1305_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct spi_device	*spi;
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index c4ec5c158aa1..de033b7ac21f 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -556,7 +556,8 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
 #define NVRAM_SIZE	56
 
 static ssize_t
-ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
+ds1307_nvram_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct i2c_client	*client;
@@ -580,7 +581,8 @@ ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
+ds1307_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct i2c_client	*client;
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 06b8566c4532..37268e97de49 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -423,8 +423,9 @@ static const struct rtc_class_ops ds1511_rtc_ops = {
 };
 
  static ssize_t
-ds1511_nvram_read(struct kobject *kobj, struct bin_attribute *ba,
-				char *buf, loff_t pos, size_t size)
+ds1511_nvram_read(struct file *filp, struct kobject *kobj,
+		  struct bin_attribute *ba,
+		  char *buf, loff_t pos, size_t size)
 {
 	ssize_t count;
 
@@ -452,8 +453,9 @@ ds1511_nvram_read(struct kobject *kobj, struct bin_attribute *ba,
 }
 
  static ssize_t
-ds1511_nvram_write(struct kobject *kobj, struct bin_attribute *bin_attr,
-				char *buf, loff_t pos, size_t size)
+ds1511_nvram_write(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *bin_attr,
+		   char *buf, loff_t pos, size_t size)
 {
 	ssize_t count;
 
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 244f9994bcbb..ff432e2ca275 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -252,7 +252,7 @@ static const struct rtc_class_ops ds1553_rtc_ops = {
 	.update_irq_enable	= ds1553_rtc_update_irq_enable,
 };
 
-static ssize_t ds1553_nvram_read(struct kobject *kobj,
+static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t pos, size_t size)
 {
@@ -267,7 +267,7 @@ static ssize_t ds1553_nvram_read(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t ds1553_nvram_write(struct kobject *kobj,
+static ssize_t ds1553_nvram_write(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *buf, loff_t pos, size_t size)
 {
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 2b4b0bc42d6f..042630c90dd3 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -128,7 +128,7 @@ static const struct rtc_class_ops ds1742_rtc_ops = {
 	.set_time	= ds1742_rtc_set_time,
 };
 
-static ssize_t ds1742_nvram_read(struct kobject *kobj,
+static ssize_t ds1742_nvram_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t pos, size_t size)
 {
@@ -143,7 +143,7 @@ static ssize_t ds1742_nvram_read(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t ds1742_nvram_write(struct kobject *kobj,
+static ssize_t ds1742_nvram_write(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *buf, loff_t pos, size_t size)
 {
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index 365ff3ac2348..be8359fdb65a 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -343,7 +343,7 @@ static const struct rtc_class_ops m48t02_rtc_ops = {
 	.set_time	= m48t59_rtc_set_time,
 };
 
-static ssize_t m48t59_nvram_read(struct kobject *kobj,
+static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t pos, size_t size)
 {
@@ -363,7 +363,7 @@ static ssize_t m48t59_nvram_read(struct kobject *kobj,
 	return cnt;
 }
 
-static ssize_t m48t59_nvram_write(struct kobject *kobj,
+static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t pos, size_t size)
 {
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index b53a00198dbe..3b943673cd3e 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -244,7 +244,7 @@ static const struct rtc_class_ops stk17ta8_rtc_ops = {
 	.alarm_irq_enable	= stk17ta8_rtc_alarm_irq_enable,
 };
 
-static ssize_t stk17ta8_nvram_read(struct kobject *kobj,
+static ssize_t stk17ta8_nvram_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *attr, char *buf,
 				 loff_t pos, size_t size)
 {
@@ -259,7 +259,7 @@ static ssize_t stk17ta8_nvram_read(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t stk17ta8_nvram_write(struct kobject *kobj,
+static ssize_t stk17ta8_nvram_write(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *attr, char *buf,
 				  loff_t pos, size_t size)
 {
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
index 20bfc64a15c8..ec6313d15359 100644
--- a/drivers/rtc/rtc-tx4939.c
+++ b/drivers/rtc/rtc-tx4939.c
@@ -188,7 +188,7 @@ static const struct rtc_class_ops tx4939_rtc_ops = {
 	.alarm_irq_enable	= tx4939_rtc_alarm_irq_enable,
 };
 
-static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj,
+static ssize_t tx4939_rtc_nvram_read(struct file *filp, struct kobject *kobj,
 				     struct bin_attribute *bin_attr,
 				     char *buf, loff_t pos, size_t size)
 {
@@ -207,7 +207,7 @@ static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t tx4939_rtc_nvram_write(struct kobject *kobj,
+static ssize_t tx4939_rtc_nvram_write(struct file *filp, struct kobject *kobj,
 				      struct bin_attribute *bin_attr,
 				      char *buf, loff_t pos, size_t size)
 {
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 1d16189f2f2d..6c9fa15aac7b 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -135,7 +135,8 @@ static int s390_vary_chpid(struct chp_id chpid, int on)
 /*
  * Channel measurement related functions
  */
-static ssize_t chp_measurement_chars_read(struct kobject *kobj,
+static ssize_t chp_measurement_chars_read(struct file *filp,
+					  struct kobject *kobj,
 					  struct bin_attribute *bin_attr,
 					  char *buf, loff_t off, size_t count)
 {
@@ -182,7 +183,7 @@ static void chp_measurement_copy_block(struct cmg_entry *buf,
 	} while (reference_buf.values[0] != buf->values[0]);
 }
 
-static ssize_t chp_measurement_read(struct kobject *kobj,
+static ssize_t chp_measurement_read(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *bin_attr,
 				    char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 54c5ffb1eaa1..d38000db9237 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -98,7 +98,7 @@ static int twl_reset_device_extension(TW_Device_Extension *tw_dev, int ioctl_res
 /* Functions */
 
 /* This function returns AENs through sysfs */
-static ssize_t twl_sysfs_aen_read(struct kobject *kobj,
+static ssize_t twl_sysfs_aen_read(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *outbuf, loff_t offset, size_t count)
 {
@@ -129,7 +129,7 @@ static struct bin_attribute twl_sysfs_aen_read_attr = {
 };
 
 /* This function returns driver compatibility info through sysfs */
-static ssize_t twl_sysfs_compat_info(struct kobject *kobj,
+static ssize_t twl_sysfs_compat_info(struct file *filp, struct kobject *kobj,
 				     struct bin_attribute *bin_attr,
 				     char *outbuf, loff_t offset, size_t count)
 {
diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
index 5877f29a6005..a4e04c50c436 100644
--- a/drivers/scsi/arcmsr/arcmsr_attr.c
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -59,7 +59,8 @@
 
 struct device_attribute *arcmsr_host_attrs[];
 
-static ssize_t arcmsr_sysfs_iop_message_read(struct kobject *kobj,
+static ssize_t arcmsr_sysfs_iop_message_read(struct file *filp,
+					     struct kobject *kobj,
 					     struct bin_attribute *bin,
 					     char *buf, loff_t off,
 					     size_t count)
@@ -105,7 +106,8 @@ static ssize_t arcmsr_sysfs_iop_message_read(struct kobject *kobj,
 	return (allxfer_len);
 }
 
-static ssize_t arcmsr_sysfs_iop_message_write(struct kobject *kobj,
+static ssize_t arcmsr_sysfs_iop_message_write(struct file *filp,
+					      struct kobject *kobj,
 					      struct bin_attribute *bin,
 					      char *buf, loff_t off,
 					      size_t count)
@@ -153,7 +155,8 @@ static ssize_t arcmsr_sysfs_iop_message_write(struct kobject *kobj,
 	}
 }
 
-static ssize_t arcmsr_sysfs_iop_message_clear(struct kobject *kobj,
+static ssize_t arcmsr_sysfs_iop_message_clear(struct file *filp,
+					      struct kobject *kobj,
 					      struct bin_attribute *bin,
 					      char *buf, loff_t off,
 					      size_t count)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index d18f45c95639..3eb2b7b3d8b0 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -2919,6 +2919,7 @@ static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR,
 #ifdef CONFIG_SCSI_IBMVFC_TRACE
 /**
  * ibmvfc_read_trace - Dump the adapter trace
+ * @filp:		open sysfs file
  * @kobj:		kobject struct
  * @bin_attr:	bin_attribute struct
  * @buf:		buffer
@@ -2928,7 +2929,7 @@ static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR,
  * Return value:
  *	number of bytes printed to buffer
  **/
-static ssize_t ibmvfc_read_trace(struct kobject *kobj,
+static ssize_t ibmvfc_read_trace(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index b90c118119d7..6a6661c35b2f 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3120,6 +3120,7 @@ restart:
 #ifdef CONFIG_SCSI_IPR_TRACE
 /**
  * ipr_read_trace - Dump the adapter trace
+ * @filp:		open sysfs file
  * @kobj:		kobject struct
  * @bin_attr:		bin_attribute struct
  * @buf:		buffer
@@ -3129,7 +3130,7 @@ restart:
  * Return value:
  *	number of bytes printed to buffer
  **/
-static ssize_t ipr_read_trace(struct kobject *kobj,
+static ssize_t ipr_read_trace(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t off, size_t count)
 {
@@ -3764,6 +3765,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
 #ifdef CONFIG_SCSI_IPR_DUMP
 /**
  * ipr_read_dump - Dump the adapter
+ * @filp:		open sysfs file
  * @kobj:		kobject struct
  * @bin_attr:		bin_attribute struct
  * @buf:		buffer
@@ -3773,7 +3775,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
  * Return value:
  *	number of bytes printed to buffer
  **/
-static ssize_t ipr_read_dump(struct kobject *kobj,
+static ssize_t ipr_read_dump(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t off, size_t count)
 {
@@ -3927,6 +3929,7 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg)
 
 /**
  * ipr_write_dump - Setup dump state of adapter
+ * @filp:		open sysfs file
  * @kobj:		kobject struct
  * @bin_attr:		bin_attribute struct
  * @buf:		buffer
@@ -3936,7 +3939,7 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg)
  * Return value:
  *	number of bytes printed to buffer
  **/
-static ssize_t ipr_write_dump(struct kobject *kobj,
+static ssize_t ipr_write_dump(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 2e5f376d9ccc..bf33b315f93e 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -2643,6 +2643,7 @@ static DEVICE_ATTR(lpfc_stat_data_ctrl, S_IRUGO | S_IWUSR,
 
 /**
  * sysfs_drvr_stat_data_read - Read function for lpfc_drvr_stat_data attribute
+ * @filp: sysfs file
  * @kobj: Pointer to the kernel object
  * @bin_attr: Attribute object
  * @buff: Buffer pointer
@@ -2654,7 +2655,8 @@ static DEVICE_ATTR(lpfc_stat_data_ctrl, S_IRUGO | S_IWUSR,
  * applications.
  **/
 static ssize_t
-sysfs_drvr_stat_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_drvr_stat_data_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device,
@@ -3362,6 +3364,7 @@ struct device_attribute *lpfc_vport_attrs[] = {
 
 /**
  * sysfs_ctlreg_write - Write method for writing to ctlreg
+ * @filp: open sysfs file
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
  * @buf: contains the data to be written to the adapter IOREG space.
@@ -3379,7 +3382,8 @@ struct device_attribute *lpfc_vport_attrs[] = {
  * value of count, buf contents written
  **/
 static ssize_t
-sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_ctlreg_write(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *bin_attr,
 		   char *buf, loff_t off, size_t count)
 {
 	size_t buf_off;
@@ -3415,6 +3419,7 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * sysfs_ctlreg_read - Read method for reading from ctlreg
+ * @filp: open sysfs file
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
  * @buf: if successful contains the data from the adapter IOREG space.
@@ -3431,7 +3436,8 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr,
  * value of count, buf contents read
  **/
 static ssize_t
-sysfs_ctlreg_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_ctlreg_read(struct file *filp, struct kobject *kobj,
+		  struct bin_attribute *bin_attr,
 		  char *buf, loff_t off, size_t count)
 {
 	size_t buf_off;
@@ -3496,6 +3502,7 @@ sysfs_mbox_idle(struct lpfc_hba *phba)
 
 /**
  * sysfs_mbox_write - Write method for writing information via mbox
+ * @filp: open sysfs file
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
  * @buf: contains the data to be written to sysfs mbox.
@@ -3516,7 +3523,8 @@ sysfs_mbox_idle(struct lpfc_hba *phba)
  * count number of bytes transferred
  **/
 static ssize_t
-sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_mbox_write(struct file *filp, struct kobject *kobj,
+		 struct bin_attribute *bin_attr,
 		 char *buf, loff_t off, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -3571,6 +3579,7 @@ sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * sysfs_mbox_read - Read method for reading information via mbox
+ * @filp: open sysfs file
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
  * @buf: contains the data to be read from sysfs mbox.
@@ -3593,7 +3602,8 @@ sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr,
  * count number of bytes transferred
  **/
 static ssize_t
-sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_mbox_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 3b708606b932..1e4cafabba15 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -16,7 +16,7 @@ static int qla24xx_vport_disable(struct fc_vport *, bool);
 /* SYSFS attributes --------------------------------------------------------- */
 
 static ssize_t
-qla2x00_sysfs_read_fw_dump(struct kobject *kobj,
+qla2x00_sysfs_read_fw_dump(struct file *filp, struct kobject *kobj,
 			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
 {
@@ -32,7 +32,7 @@ qla2x00_sysfs_read_fw_dump(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_write_fw_dump(struct kobject *kobj,
+qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj,
 			    struct bin_attribute *bin_attr,
 			    char *buf, loff_t off, size_t count)
 {
@@ -92,7 +92,7 @@ static struct bin_attribute sysfs_fw_dump_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_nvram(struct kobject *kobj,
+qla2x00_sysfs_read_nvram(struct file *filp, struct kobject *kobj,
 			 struct bin_attribute *bin_attr,
 			 char *buf, loff_t off, size_t count)
 {
@@ -111,7 +111,7 @@ qla2x00_sysfs_read_nvram(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_write_nvram(struct kobject *kobj,
+qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj,
 			  struct bin_attribute *bin_attr,
 			  char *buf, loff_t off, size_t count)
 {
@@ -177,7 +177,7 @@ static struct bin_attribute sysfs_nvram_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_optrom(struct kobject *kobj,
+qla2x00_sysfs_read_optrom(struct file *filp, struct kobject *kobj,
 			  struct bin_attribute *bin_attr,
 			  char *buf, loff_t off, size_t count)
 {
@@ -193,7 +193,7 @@ qla2x00_sysfs_read_optrom(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_write_optrom(struct kobject *kobj,
+qla2x00_sysfs_write_optrom(struct file *filp, struct kobject *kobj,
 			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
 {
@@ -224,7 +224,7 @@ static struct bin_attribute sysfs_optrom_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
+qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
@@ -387,7 +387,7 @@ static struct bin_attribute sysfs_optrom_ctl_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_vpd(struct kobject *kobj,
+qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
 {
@@ -408,7 +408,7 @@ qla2x00_sysfs_read_vpd(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_write_vpd(struct kobject *kobj,
+qla2x00_sysfs_write_vpd(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
@@ -461,7 +461,7 @@ static struct bin_attribute sysfs_vpd_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_sfp(struct kobject *kobj,
+qla2x00_sysfs_read_sfp(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
 {
@@ -522,7 +522,7 @@ static struct bin_attribute sysfs_sfp_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_write_reset(struct kobject *kobj,
+qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
@@ -592,7 +592,7 @@ static struct bin_attribute sysfs_reset_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_write_edc(struct kobject *kobj,
+qla2x00_sysfs_write_edc(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
@@ -650,7 +650,7 @@ static struct bin_attribute sysfs_edc_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_write_edc_status(struct kobject *kobj,
+qla2x00_sysfs_write_edc_status(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
@@ -700,7 +700,7 @@ qla2x00_sysfs_write_edc_status(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_read_edc_status(struct kobject *kobj,
+qla2x00_sysfs_read_edc_status(struct file *filp, struct kobject *kobj,
 			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
 {
@@ -730,7 +730,7 @@ static struct bin_attribute sysfs_edc_status_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_xgmac_stats(struct kobject *kobj,
+qla2x00_sysfs_read_xgmac_stats(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
 {
@@ -782,7 +782,7 @@ static struct bin_attribute sysfs_xgmac_stats_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_dcbx_tlv(struct kobject *kobj,
+qla2x00_sysfs_read_dcbx_tlv(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/staging/udlfb/udlfb.c b/drivers/staging/udlfb/udlfb.c
index aa8195199a2c..577f2bf6eb23 100644
--- a/drivers/staging/udlfb/udlfb.c
+++ b/drivers/staging/udlfb/udlfb.c
@@ -1063,7 +1063,8 @@ static ssize_t metrics_misc_show(struct device *fbdev,
 			atomic_read(&dev->lost_pixels) ? "yes" : "no");
 }
 
-static ssize_t edid_show(struct kobject *kobj, struct bin_attribute *a,
+static ssize_t edid_show(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *a,
 			 char *buf, loff_t off, size_t count) {
 	struct device *fbdev = container_of(kobj, struct device, kobj);
 	struct fb_info *fb_info = dev_get_drvdata(fbdev);
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 06863befaf3a..448f5b47fc48 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -658,7 +658,8 @@ const struct attribute_group *usb_device_groups[] = {
 /* Binary descriptors */
 
 static ssize_t
-read_descriptors(struct kobject *kobj, struct bin_attribute *attr,
+read_descriptors(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 6c37e8ee5efe..3c1e13ed1cba 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -2099,7 +2099,7 @@ static ssize_t radeon_show_one_edid(char *buf, loff_t off, size_t count, const u
 }
 
 
-static ssize_t radeon_show_edid1(struct kobject *kobj,
+static ssize_t radeon_show_edid1(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t off, size_t count)
 {
@@ -2112,7 +2112,7 @@ static ssize_t radeon_show_edid1(struct kobject *kobj,
 }
 
 
-static ssize_t radeon_show_edid2(struct kobject *kobj,
+static ssize_t radeon_show_edid2(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/w1/slaves/w1_ds2431.c b/drivers/w1/slaves/w1_ds2431.c
index 2c6c0cf6a20f..84e2410aec1d 100644
--- a/drivers/w1/slaves/w1_ds2431.c
+++ b/drivers/w1/slaves/w1_ds2431.c
@@ -96,7 +96,7 @@ static int w1_f2d_readblock(struct w1_slave *sl, int off, int count, char *buf)
 	return -1;
 }
 
-static ssize_t w1_f2d_read_bin(struct kobject *kobj,
+static ssize_t w1_f2d_read_bin(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
@@ -202,7 +202,7 @@ retry:
 	return 0;
 }
 
-static ssize_t w1_f2d_write_bin(struct kobject *kobj,
+static ssize_t w1_f2d_write_bin(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c
index d2bf32118a98..0f7b8f9c509a 100644
--- a/drivers/w1/slaves/w1_ds2433.c
+++ b/drivers/w1/slaves/w1_ds2433.c
@@ -92,7 +92,7 @@ static int w1_f23_refresh_block(struct w1_slave *sl, struct w1_f23_data *data,
 }
 #endif	/* CONFIG_W1_SLAVE_DS2433_CRC */
 
-static ssize_t w1_f23_read_bin(struct kobject *kobj,
+static ssize_t w1_f23_read_bin(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
@@ -206,7 +206,7 @@ static int w1_f23_write(struct w1_slave *sl, int addr, int len, const u8 *data)
 	return 0;
 }
 
-static ssize_t w1_f23_write_bin(struct kobject *kobj,
+static ssize_t w1_f23_write_bin(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index 6e153343e117..483d45180911 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -97,7 +97,7 @@ int w1_ds2760_recall_eeprom(struct device *dev, int addr)
 	return w1_ds2760_eeprom_cmd(dev, addr, W1_DS2760_RECALL_DATA);
 }
 
-static ssize_t w1_ds2760_read_bin(struct kobject *kobj,
+static ssize_t w1_ds2760_read_bin(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index ad5897dc4495..2839e281cd65 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -120,7 +120,7 @@ static struct device_attribute w1_slave_attr_id =
 
 /* Default family */
 
-static ssize_t w1_default_write(struct kobject *kobj,
+static ssize_t w1_default_write(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t off, size_t count)
 {
@@ -139,7 +139,7 @@ out_up:
 	return count;
 }
 
-static ssize_t w1_default_read(struct kobject *kobj,
+static ssize_t w1_default_read(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c
index eb924e0a64ce..26f7184ef9e1 100644
--- a/drivers/zorro/zorro-sysfs.c
+++ b/drivers/zorro/zorro-sysfs.c
@@ -49,7 +49,7 @@ static ssize_t zorro_show_resource(struct device *dev, struct device_attribute *
 
 static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL);
 
-static ssize_t zorro_read_config(struct kobject *kobj,
+static ssize_t zorro_read_config(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t off, size_t count)
 {
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 806b277453f9..4e321f7353fa 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -46,9 +46,9 @@ struct bin_buffer {
 };
 
 static int
-fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
+fill_read(struct file *file, char *buffer, loff_t off, size_t count)
 {
-	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
 	int rc;
@@ -59,7 +59,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
 
 	rc = -EIO;
 	if (attr->read)
-		rc = attr->read(kobj, attr, buffer, off, count);
+		rc = attr->read(file, kobj, attr, buffer, off, count);
 
 	sysfs_put_active(attr_sd);
 
@@ -70,8 +70,7 @@ static ssize_t
 read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 {
 	struct bin_buffer *bb = file->private_data;
-	struct dentry *dentry = file->f_path.dentry;
-	int size = dentry->d_inode->i_size;
+	int size = file->f_path.dentry->d_inode->i_size;
 	loff_t offs = *off;
 	int count = min_t(size_t, bytes, PAGE_SIZE);
 	char *temp;
@@ -92,7 +91,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 
 	mutex_lock(&bb->mutex);
 
-	count = fill_read(dentry, bb->buffer, offs, count);
+	count = fill_read(file, bb->buffer, offs, count);
 	if (count < 0) {
 		mutex_unlock(&bb->mutex);
 		goto out_free;
@@ -117,9 +116,9 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 }
 
 static int
-flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
+flush_write(struct file *file, char *buffer, loff_t offset, size_t count)
 {
-	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
 	int rc;
@@ -130,7 +129,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
 
 	rc = -EIO;
 	if (attr->write)
-		rc = attr->write(kobj, attr, buffer, offset, count);
+		rc = attr->write(file, kobj, attr, buffer, offset, count);
 
 	sysfs_put_active(attr_sd);
 
@@ -141,8 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
 		     size_t bytes, loff_t *off)
 {
 	struct bin_buffer *bb = file->private_data;
-	struct dentry *dentry = file->f_path.dentry;
-	int size = dentry->d_inode->i_size;
+	int size = file->f_path.dentry->d_inode->i_size;
 	loff_t offs = *off;
 	int count = min_t(size_t, bytes, PAGE_SIZE);
 	char *temp;
@@ -165,7 +163,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
 
 	memcpy(bb->buffer, temp, count);
 
-	count = flush_write(dentry, bb->buffer, offs, count);
+	count = flush_write(file, bb->buffer, offs, count);
 	mutex_unlock(&bb->mutex);
 
 	if (count > 0)
@@ -363,7 +361,7 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
 	if (!attr->mmap)
 		goto out_put;
 
-	rc = attr->mmap(kobj, attr, vma);
+	rc = attr->mmap(file, kobj, attr, vma);
 	if (rc)
 		goto out_put;
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 6903e9204032..f2694eb4dd3d 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -87,17 +87,18 @@ struct attribute_group {
 
 #define attr_name(_attr) (_attr).attr.name
 
+struct file;
 struct vm_area_struct;
 
 struct bin_attribute {
 	struct attribute	attr;
 	size_t			size;
 	void			*private;
-	ssize_t (*read)(struct kobject *, struct bin_attribute *,
+	ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *,
 			char *, loff_t, size_t);
-	ssize_t (*write)(struct kobject *, struct bin_attribute *,
+	ssize_t (*write)(struct file *,struct kobject *, struct bin_attribute *,
 			 char *, loff_t, size_t);
-	int (*mmap)(struct kobject *, struct bin_attribute *attr,
+	int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr,
 		    struct vm_area_struct *vma);
 };
 
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 21fe3c426948..0b624e791805 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -138,7 +138,8 @@ extern const void __start_notes __attribute__((weak));
 extern const void __stop_notes __attribute__((weak));
 #define	notes_size (&__stop_notes - &__start_notes)
 
-static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+static ssize_t notes_read(struct file *filp, struct kobject *kobj,
+			  struct bin_attribute *bin_attr,
 			  char *buf, loff_t off, size_t count)
 {
 	memcpy(buf, &__start_notes + off, count);
diff --git a/kernel/module.c b/kernel/module.c
index e2564580f3f1..5e14483768bb 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1182,7 +1182,7 @@ struct module_notes_attrs {
 	struct bin_attribute attrs[0];
 };
 
-static ssize_t module_notes_read(struct kobject *kobj,
+static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t pos, size_t count)
 {
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index dd321e39e621..486b8f3861d2 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -659,7 +659,7 @@ static struct attribute_group bridge_group = {
  *
  * Returns the number of bytes read.
  */
-static ssize_t brforward_read(struct kobject *kobj,
+static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t off, size_t count)
 {
-- 
cgit v1.2.3-58-ga151


From 608b4b9548dedf4185ca47edcaae4bff2ceb62de Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:45 -0700
Subject: netns: Teach network device kobjects which namespace they are in.

The problem.  Network devices show up in sysfs and with the network
namespace active multiple devices with the same name can show up in
the same directory, ouch!

To avoid that problem and allow existing applications in network namespaces
to see the same interface that is currently presented in sysfs, this
patch enables the tagging directory support in sysfs.

By using the network namespace pointers as tags to separate out the
the sysfs directory entries we ensure that we don't have conflicts
in the directories and applications only see a limited set of
the network devices.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h |  1 +
 net/Kconfig             |  8 ++++++++
 net/core/net-sysfs.c    | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

(limited to 'net')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index b60d2dfe4e69..cf343a852534 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -142,6 +142,7 @@ extern const struct sysfs_ops kobj_sysfs_ops;
  */
 enum kobj_ns_type {
 	KOBJ_NS_TYPE_NONE = 0,
+	KOBJ_NS_TYPE_NET,
 	KOBJ_NS_TYPES
 };
 
diff --git a/net/Kconfig b/net/Kconfig
index 0d68b40fc0e6..f49532053a98 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,6 +45,14 @@ config COMPAT_NETLINK_MESSAGES
 
 menu "Networking options"
 
+config NET_NS
+	bool "Network namespace support"
+	default n
+	depends on EXPERIMENTAL && NAMESPACES
+	help
+	  Allow user space to create what appear to be multiple instances
+	  of the network stack.
+
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c57c4b228bb5..b388cdab9316 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -14,7 +14,9 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
+#include <linux/nsproxy.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
 #include <linux/vmalloc.h>
@@ -766,6 +768,38 @@ static void rx_queue_remove_kobjects(struct net_device *net)
 	kset_unregister(net->queues_kset);
 }
 #endif /* CONFIG_RPS */
+
+static const void *net_current_ns(void)
+{
+	return current->nsproxy->net_ns;
+}
+
+static const void *net_initial_ns(void)
+{
+	return &init_net;
+}
+
+static const void *net_netlink_ns(struct sock *sk)
+{
+	return sock_net(sk);
+}
+
+static struct kobj_ns_type_operations net_ns_type_operations = {
+	.type = KOBJ_NS_TYPE_NET,
+	.current_ns = net_current_ns,
+	.netlink_ns = net_netlink_ns,
+	.initial_ns = net_initial_ns,
+};
+
+static void net_kobj_ns_exit(struct net *net)
+{
+	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
+}
+
+static struct pernet_operations sysfs_net_ops = {
+	.exit = net_kobj_ns_exit,
+};
+
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -806,6 +840,13 @@ static void netdev_release(struct device *d)
 	kfree((char *)dev - dev->padded);
 }
 
+static const void *net_namespace(struct device *d)
+{
+	struct net_device *dev;
+	dev = container_of(d, struct net_device, dev);
+	return dev_net(dev);
+}
+
 static struct class net_class = {
 	.name = "net",
 	.dev_release = netdev_release,
@@ -815,6 +856,8 @@ static struct class net_class = {
 #ifdef CONFIG_HOTPLUG
 	.dev_uevent = netdev_uevent,
 #endif
+	.ns_type = &net_ns_type_operations,
+	.namespace = net_namespace,
 };
 
 /* Delete sysfs entries but hold kobject reference until after all
@@ -904,5 +947,9 @@ void netdev_initialize_kobject(struct net_device *net)
 
 int netdev_kobject_init(void)
 {
+	kobj_ns_type_register(&net_ns_type_operations);
+#ifdef CONFIG_SYSFS
+	register_pernet_subsys(&sysfs_net_ops);
+#endif
 	return class_register(&net_class);
 }
-- 
cgit v1.2.3-58-ga151


From d6523ddf2376f39eaa89a4d68a33052d20c138b9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 16 May 2010 21:59:45 -0700
Subject: net/sysfs: Fix the bitrot in network device kobject namespace support

I had a couple of stupid bugs in:
netns: Teach network device kobjects which namespace they are in.

- I duplicated the Kconfig for the NET_NS
- The build was broken when sysfs was not compiled in

The sysfs breakage is because after I moved the operations
for the sysfs to the kobject layer, to make things cleaner
I forgot to move the ifdefs.  Opps.

I'm not quite certain how I got introduced a second NET_NS Kconfig,
but it was probably a 3 way merge somewhere along the way that
did not notice that the NET_NS Kconfig option had mvoed and thout
that was a bug.  It probably slipped in because it used to be the
sysfs patches were the first patches in my network namespace patches.
Some things just don't go like you would expect.

Neither of these bugs actually affect anything in the common case
but they should be fixed.

Thanks to Serge for noticing they were present.

Reported-by: Serge E. Hallyn <serue@us.ibm.com>
Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig          | 8 --------
 net/core/net-sysfs.c | 8 +++-----
 2 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index f49532053a98..0d68b40fc0e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,14 +45,6 @@ config COMPAT_NETLINK_MESSAGES
 
 menu "Networking options"
 
-config NET_NS
-	bool "Network namespace support"
-	default n
-	depends on EXPERIMENTAL && NAMESPACES
-	help
-	  Allow user space to create what appear to be multiple instances
-	  of the network stack.
-
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b388cdab9316..6881e65944c8 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -469,6 +469,7 @@ static struct attribute_group wireless_group = {
 	.attrs = wireless_attrs,
 };
 #endif
+#endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_RPS
 /*
@@ -796,11 +797,10 @@ static void net_kobj_ns_exit(struct net *net)
 	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
 }
 
-static struct pernet_operations sysfs_net_ops = {
+static struct pernet_operations kobj_net_ops = {
 	.exit = net_kobj_ns_exit,
 };
 
-#endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
 static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
@@ -948,8 +948,6 @@ void netdev_initialize_kobject(struct net_device *net)
 int netdev_kobject_init(void)
 {
 	kobj_ns_type_register(&net_ns_type_operations);
-#ifdef CONFIG_SYSFS
-	register_pernet_subsys(&sysfs_net_ops);
-#endif
+	register_pernet_subsys(&kobj_net_ops);
 	return class_register(&net_class);
 }
-- 
cgit v1.2.3-58-ga151


From 910a7e905f36e51a17d6e8bb4ad6dcd5ac5f1d53 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:46 -0700
Subject: netlink: Implment netlink_broadcast_filtered

When netlink sockets are used to convey data that is in a namespace
we need a way to select a subset of the listening sockets to deliver
the packet to.  For the network namespace we have been doing this
by only transmitting packets in the correct network namespace.

For data belonging to other namespaces netlink_bradcast_filtered
provides a mechanism that allows us to examine the destination
socket and to decide if we should transmit the specified packet
to it.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/netlink.h  |  4 ++++
 net/netlink/af_netlink.c | 21 +++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 6eaca5e1e8ca..59d066936ab9 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -188,6 +188,10 @@ extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
 			     __u32 group, gfp_t allocation);
+extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+	__u32 pid, __u32 group, gfp_t allocation,
+	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+	void *filter_data);
 extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
 extern int netlink_register_notifier(struct notifier_block *nb);
 extern int netlink_unregister_notifier(struct notifier_block *nb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6464a1972a69..a2eb965207d3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -978,6 +978,8 @@ struct netlink_broadcast_data {
 	int delivered;
 	gfp_t allocation;
 	struct sk_buff *skb, *skb2;
+	int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+	void *tx_data;
 };
 
 static inline int do_one_broadcast(struct sock *sk,
@@ -1020,6 +1022,9 @@ static inline int do_one_broadcast(struct sock *sk,
 		p->failure = 1;
 		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
 			p->delivery_failure = 1;
+	} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+		kfree_skb(p->skb2);
+		p->skb2 = NULL;
 	} else if (sk_filter(sk, p->skb2)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
@@ -1038,8 +1043,10 @@ out:
 	return 0;
 }
 
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-		      u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
+	u32 group, gfp_t allocation,
+	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+	void *filter_data)
 {
 	struct net *net = sock_net(ssk);
 	struct netlink_broadcast_data info;
@@ -1059,6 +1066,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	info.allocation = allocation;
 	info.skb = skb;
 	info.skb2 = NULL;
+	info.tx_filter = filter;
+	info.tx_data = filter_data;
 
 	/* While we sleep in clone, do not allow to change socket list */
 
@@ -1083,6 +1092,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	}
 	return -ESRCH;
 }
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+		      u32 group, gfp_t allocation)
+{
+	return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
+		NULL, NULL);
+}
 EXPORT_SYMBOL(netlink_broadcast);
 
 struct netlink_set_err_data {
-- 
cgit v1.2.3-58-ga151


From a1b3f594dc5faab91d3a218c7019e9b5edd9fe1a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:49 -0700
Subject: net: Expose all network devices in a namespaces in sysfs

This reverts commit aaf8cdc34ddba08122f02217d9d684e2f9f5d575.

Drivers like the ipw2100 call device_create_group when they
are initialized and device_remove_group when they are shutdown.
Moving them between namespaces deletes their sysfs groups early.

In particular the following call chain results.
netdev_unregister_kobject -> device_del -> kobject_del -> sysfs_remove_dir
With sysfs_remove_dir recursively deleting all of it's subdirectories,
and nothing adding them back.

Ouch!

Therefore we need to call something that ultimate calls sysfs_mv_dir
as that sysfs function can move sysfs directories between namespaces
without deleting their subdirectories or their contents.   Allowing
us to avoid placing extra boiler plate into every driver that does
something interesting with sysfs.

Currently the function that provides that capability is device_rename.
That is the code works without nasty side effects as originally written.

So remove the misguided fix for moving devices between namespaces.  The
bug in the kobject layer that inspired it has now been recognized and
fixed.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 net/core/dev.c       | 28 +++++-----------------------
 net/core/net-sysfs.c | 16 +---------------
 net/core/net-sysfs.h |  1 -
 3 files changed, 6 insertions(+), 39 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 6c820650b80f..d273e4e3ecdc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1002,15 +1002,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
 		return err;
 
 rollback:
-	/* For now only devices in the initial network namespace
-	 * are in sysfs.
-	 */
-	if (net_eq(net, &init_net)) {
-		ret = device_rename(&dev->dev, dev->name);
-		if (ret) {
-			memcpy(dev->name, oldname, IFNAMSIZ);
-			return ret;
-		}
+	ret = device_rename(&dev->dev, dev->name);
+	if (ret) {
+		memcpy(dev->name, oldname, IFNAMSIZ);
+		return ret;
 	}
 
 	write_lock_bh(&dev_base_lock);
@@ -4994,8 +4989,6 @@ int register_netdevice(struct net_device *dev)
 	if (dev->features & NETIF_F_SG)
 		dev->features |= NETIF_F_GSO;
 
-	netdev_initialize_kobject(dev);
-
 	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
 	ret = notifier_to_errno(ret);
 	if (ret)
@@ -5547,15 +5540,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	if (dev->features & NETIF_F_NETNS_LOCAL)
 		goto out;
 
-#ifdef CONFIG_SYSFS
-	/* Don't allow real devices to be moved when sysfs
-	 * is enabled.
-	 */
-	err = -EINVAL;
-	if (dev->dev.parent)
-		goto out;
-#endif
-
 	/* Ensure the device has been registrered */
 	err = -EINVAL;
 	if (dev->reg_state != NETREG_REGISTERED)
@@ -5606,8 +5590,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	dev_uc_flush(dev);
 	dev_mc_flush(dev);
 
-	netdev_unregister_kobject(dev);
-
 	/* Actually switch the network namespace */
 	dev_net_set(dev, net);
 
@@ -5620,7 +5602,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	}
 
 	/* Fixup kobjects */
-	err = netdev_register_kobject(dev);
+	err = device_rename(&dev->dev, dev->name);
 	WARN_ON(err);
 
 	/* Add the device back in the hashes */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 6881e65944c8..99e7052d7323 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -808,9 +808,6 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
 	struct net_device *dev = to_net_dev(d);
 	int retval;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return 0;
-
 	/* pass interface to uevent. */
 	retval = add_uevent_var(env, "INTERFACE=%s", dev->name);
 	if (retval)
@@ -869,9 +866,6 @@ void netdev_unregister_kobject(struct net_device * net)
 
 	kobject_get(&dev->kobj);
 
-	if (!net_eq(dev_net(net), &init_net))
-		return;
-
 #ifdef CONFIG_RPS
 	rx_queue_remove_kobjects(net);
 #endif
@@ -886,6 +880,7 @@ int netdev_register_kobject(struct net_device *net)
 	const struct attribute_group **groups = net->sysfs_groups;
 	int error = 0;
 
+	device_initialize(dev);
 	dev->class = &net_class;
 	dev->platform_data = net;
 	dev->groups = groups;
@@ -908,9 +903,6 @@ int netdev_register_kobject(struct net_device *net)
 #endif
 #endif /* CONFIG_SYSFS */
 
-	if (!net_eq(dev_net(net), &init_net))
-		return 0;
-
 	error = device_add(dev);
 	if (error)
 		return error;
@@ -939,12 +931,6 @@ void netdev_class_remove_file(struct class_attribute *class_attr)
 EXPORT_SYMBOL(netdev_class_create_file);
 EXPORT_SYMBOL(netdev_class_remove_file);
 
-void netdev_initialize_kobject(struct net_device *net)
-{
-	struct device *device = &(net->dev);
-	device_initialize(device);
-}
-
 int netdev_kobject_init(void)
 {
 	kobj_ns_type_register(&net_ns_type_operations);
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 14e7524260b3..805555e8b187 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,5 +4,4 @@
 int netdev_kobject_init(void);
 int netdev_register_kobject(struct net_device *);
 void netdev_unregister_kobject(struct net_device *);
-void netdev_initialize_kobject(struct net_device *);
 #endif
-- 
cgit v1.2.3-58-ga151


From 35f3d14dbbc58447c61e38a162ea10add6b31dc7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 20 May 2010 10:43:18 +0200
Subject: pipe: add support for shrinking and growing pipes

This patch adds F_GETPIPE_SZ and F_SETPIPE_SZ fcntl() actions for
growing and shrinking the size of a pipe and adjusts pipe.c and splice.c
(and relay and network splice) usage to work with these larger (or smaller)
pipes.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fcntl.c                |   5 ++
 fs/pipe.c                 | 107 ++++++++++++++++++++++++++++----
 fs/splice.c               | 151 ++++++++++++++++++++++++++++++++--------------
 include/linux/fcntl.h     |   6 ++
 include/linux/pipe_fs_i.h |  11 ++--
 include/linux/splice.h    |   7 +++
 kernel/relay.c            |  15 +++--
 kernel/trace/trace.c      |  60 ++++++++++--------
 net/core/skbuff.c         |  38 ++++++------
 9 files changed, 292 insertions(+), 108 deletions(-)

(limited to 'net')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 452d02f9075e..bcba960328fa 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -14,6 +14,7 @@
 #include <linux/dnotify.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/pipe_fs_i.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
@@ -412,6 +413,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_NOTIFY:
 		err = fcntl_dirnotify(fd, filp, arg);
 		break;
+	case F_SETPIPE_SZ:
+	case F_GETPIPE_SZ:
+		err = pipe_fcntl(filp, cmd, arg);
+		break;
 	default:
 		break;
 	}
diff --git a/fs/pipe.c b/fs/pipe.c
index 37ba29ff3158..054b8a6a2c7a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/log2.h>
 #include <linux/mount.h>
 #include <linux/pipe_fs_i.h>
 #include <linux/uio.h>
@@ -390,7 +391,7 @@ redo:
 			if (!buf->len) {
 				buf->ops = NULL;
 				ops->release(pipe, buf);
-				curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
+				curbuf = (curbuf + 1) & (pipe->buffers - 1);
 				pipe->curbuf = curbuf;
 				pipe->nrbufs = --bufs;
 				do_wakeup = 1;
@@ -472,7 +473,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 	chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
 	if (pipe->nrbufs && chars != 0) {
 		int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
-							(PIPE_BUFFERS-1);
+							(pipe->buffers - 1);
 		struct pipe_buffer *buf = pipe->bufs + lastbuf;
 		const struct pipe_buf_operations *ops = buf->ops;
 		int offset = buf->offset + buf->len;
@@ -518,8 +519,8 @@ redo1:
 			break;
 		}
 		bufs = pipe->nrbufs;
-		if (bufs < PIPE_BUFFERS) {
-			int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
+		if (bufs < pipe->buffers) {
+			int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
 			struct pipe_buffer *buf = pipe->bufs + newbuf;
 			struct page *page = pipe->tmp_page;
 			char *src;
@@ -580,7 +581,7 @@ redo2:
 			if (!total_len)
 				break;
 		}
-		if (bufs < PIPE_BUFFERS)
+		if (bufs < pipe->buffers)
 			continue;
 		if (filp->f_flags & O_NONBLOCK) {
 			if (!ret)
@@ -640,7 +641,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			nrbufs = pipe->nrbufs;
 			while (--nrbufs >= 0) {
 				count += pipe->bufs[buf].len;
-				buf = (buf+1) & (PIPE_BUFFERS-1);
+				buf = (buf+1) & (pipe->buffers - 1);
 			}
 			mutex_unlock(&inode->i_mutex);
 
@@ -671,7 +672,7 @@ pipe_poll(struct file *filp, poll_table *wait)
 	}
 
 	if (filp->f_mode & FMODE_WRITE) {
-		mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
+		mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0;
 		/*
 		 * Most Unices do not set POLLERR for FIFOs but on Linux they
 		 * behave exactly like pipes for poll().
@@ -877,25 +878,32 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
 
 	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 	if (pipe) {
-		init_waitqueue_head(&pipe->wait);
-		pipe->r_counter = pipe->w_counter = 1;
-		pipe->inode = inode;
+		pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+		if (pipe->bufs) {
+			init_waitqueue_head(&pipe->wait);
+			pipe->r_counter = pipe->w_counter = 1;
+			pipe->inode = inode;
+			pipe->buffers = PIPE_DEF_BUFFERS;
+			return pipe;
+		}
+		kfree(pipe);
 	}
 
-	return pipe;
+	return NULL;
 }
 
 void __free_pipe_info(struct pipe_inode_info *pipe)
 {
 	int i;
 
-	for (i = 0; i < PIPE_BUFFERS; i++) {
+	for (i = 0; i < pipe->buffers; i++) {
 		struct pipe_buffer *buf = pipe->bufs + i;
 		if (buf->ops)
 			buf->ops->release(pipe, buf);
 	}
 	if (pipe->tmp_page)
 		__free_page(pipe->tmp_page);
+	kfree(pipe->bufs);
 	kfree(pipe);
 }
 
@@ -1093,6 +1101,81 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
 	return sys_pipe2(fildes, 0);
 }
 
+/*
+ * Allocate a new array of pipe buffers and copy the info over. Returns the
+ * pipe size if successful, or return -ERROR on error.
+ */
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+{
+	struct pipe_buffer *bufs;
+
+	/*
+	 * Must be a power-of-2 currently
+	 */
+	if (!is_power_of_2(arg))
+		return -EINVAL;
+
+	/*
+	 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
+	 * expect a lot of shrink+grow operations, just free and allocate
+	 * again like we would do for growing. If the pipe currently
+	 * contains more buffers than arg, then return busy.
+	 */
+	if (arg < pipe->nrbufs)
+		return -EBUSY;
+
+	bufs = kcalloc(arg, sizeof(struct pipe_buffer), GFP_KERNEL);
+	if (unlikely(!bufs))
+		return -ENOMEM;
+
+	/*
+	 * The pipe array wraps around, so just start the new one at zero
+	 * and adjust the indexes.
+	 */
+	if (pipe->nrbufs) {
+		const unsigned int tail = pipe->nrbufs & (pipe->buffers - 1);
+		const unsigned int head = pipe->nrbufs - tail;
+
+		if (head)
+			memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
+		if (tail)
+			memcpy(bufs + head, pipe->bufs + pipe->curbuf, tail * sizeof(struct pipe_buffer));
+	}
+
+	pipe->curbuf = 0;
+	kfree(pipe->bufs);
+	pipe->bufs = bufs;
+	pipe->buffers = arg;
+	return arg;
+}
+
+long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct pipe_inode_info *pipe;
+	long ret;
+
+	pipe = file->f_path.dentry->d_inode->i_pipe;
+	if (!pipe)
+		return -EBADF;
+
+	mutex_lock(&pipe->inode->i_mutex);
+
+	switch (cmd) {
+	case F_SETPIPE_SZ:
+		ret = pipe_set_size(pipe, arg);
+		break;
+	case F_GETPIPE_SZ:
+		ret = pipe->buffers;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	mutex_unlock(&pipe->inode->i_mutex);
+	return ret;
+}
+
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
  * no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/fs/splice.c b/fs/splice.c
index 9313b6124a2e..ac22b00d86c3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -193,8 +193,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 			break;
 		}
 
-		if (pipe->nrbufs < PIPE_BUFFERS) {
-			int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1);
+		if (pipe->nrbufs < pipe->buffers) {
+			int newbuf = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
 			struct pipe_buffer *buf = pipe->bufs + newbuf;
 
 			buf->page = spd->pages[page_nr];
@@ -214,7 +214,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 
 			if (!--spd->nr_pages)
 				break;
-			if (pipe->nrbufs < PIPE_BUFFERS)
+			if (pipe->nrbufs < pipe->buffers)
 				continue;
 
 			break;
@@ -265,6 +265,36 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
 	page_cache_release(spd->pages[i]);
 }
 
+/*
+ * Check if we need to grow the arrays holding pages and partial page
+ * descriptions.
+ */
+int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+{
+	if (pipe->buffers <= PIPE_DEF_BUFFERS)
+		return 0;
+
+	spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
+	spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
+
+	if (spd->pages && spd->partial)
+		return 0;
+
+	kfree(spd->pages);
+	kfree(spd->partial);
+	return -ENOMEM;
+}
+
+void splice_shrink_spd(struct pipe_inode_info *pipe,
+		       struct splice_pipe_desc *spd)
+{
+	if (pipe->buffers <= PIPE_DEF_BUFFERS)
+		return;
+
+	kfree(spd->pages);
+	kfree(spd->partial);
+}
+
 static int
 __generic_file_splice_read(struct file *in, loff_t *ppos,
 			   struct pipe_inode_info *pipe, size_t len,
@@ -272,8 +302,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 {
 	struct address_space *mapping = in->f_mapping;
 	unsigned int loff, nr_pages, req_pages;
-	struct page *pages[PIPE_BUFFERS];
-	struct partial_page partial[PIPE_BUFFERS];
+	struct page *pages[PIPE_DEF_BUFFERS];
+	struct partial_page partial[PIPE_DEF_BUFFERS];
 	struct page *page;
 	pgoff_t index, end_index;
 	loff_t isize;
@@ -286,15 +316,18 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 		.spd_release = spd_release_page,
 	};
 
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	loff = *ppos & ~PAGE_CACHE_MASK;
 	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS);
+	nr_pages = min(req_pages, pipe->buffers);
 
 	/*
 	 * Lookup the (hopefully) full range of pages we need.
 	 */
-	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages);
+	spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, spd.pages);
 	index += spd.nr_pages;
 
 	/*
@@ -335,7 +368,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 			unlock_page(page);
 		}
 
-		pages[spd.nr_pages++] = page;
+		spd.pages[spd.nr_pages++] = page;
 		index++;
 	}
 
@@ -356,7 +389,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 		 * this_len is the max we'll use from this page
 		 */
 		this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
-		page = pages[page_nr];
+		page = spd.pages[page_nr];
 
 		if (PageReadahead(page))
 			page_cache_async_readahead(mapping, &in->f_ra, in,
@@ -393,8 +426,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 					error = -ENOMEM;
 					break;
 				}
-				page_cache_release(pages[page_nr]);
-				pages[page_nr] = page;
+				page_cache_release(spd.pages[page_nr]);
+				spd.pages[page_nr] = page;
 			}
 			/*
 			 * page was already under io and is now done, great
@@ -451,8 +484,8 @@ fill_it:
 			len = this_len;
 		}
 
-		partial[page_nr].offset = loff;
-		partial[page_nr].len = this_len;
+		spd.partial[page_nr].offset = loff;
+		spd.partial[page_nr].len = this_len;
 		len -= this_len;
 		loff = 0;
 		spd.nr_pages++;
@@ -464,12 +497,13 @@ fill_it:
 	 * we got, 'nr_pages' is how many pages are in the map.
 	 */
 	while (page_nr < nr_pages)
-		page_cache_release(pages[page_nr++]);
+		page_cache_release(spd.pages[page_nr++]);
 	in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
 
 	if (spd.nr_pages)
-		return splice_to_pipe(pipe, &spd);
+		error = splice_to_pipe(pipe, &spd);
 
+	splice_shrink_spd(pipe, &spd);
 	return error;
 }
 
@@ -560,9 +594,9 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	unsigned int nr_pages;
 	unsigned int nr_freed;
 	size_t offset;
-	struct page *pages[PIPE_BUFFERS];
-	struct partial_page partial[PIPE_BUFFERS];
-	struct iovec vec[PIPE_BUFFERS];
+	struct page *pages[PIPE_DEF_BUFFERS];
+	struct partial_page partial[PIPE_DEF_BUFFERS];
+	struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
 	pgoff_t index;
 	ssize_t res;
 	size_t this_len;
@@ -576,11 +610,22 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 		.spd_release = spd_release_page,
 	};
 
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
+	res = -ENOMEM;
+	vec = __vec;
+	if (pipe->buffers > PIPE_DEF_BUFFERS) {
+		vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
+		if (!vec)
+			goto shrink_ret;
+	}
+
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	for (i = 0; i < nr_pages && i < PIPE_BUFFERS && len; i++) {
+	for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
 		struct page *page;
 
 		page = alloc_page(GFP_USER);
@@ -591,7 +636,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 		this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
 		vec[i].iov_base = (void __user *) page_address(page);
 		vec[i].iov_len = this_len;
-		pages[i] = page;
+		spd.pages[i] = page;
 		spd.nr_pages++;
 		len -= this_len;
 		offset = 0;
@@ -610,11 +655,11 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	nr_freed = 0;
 	for (i = 0; i < spd.nr_pages; i++) {
 		this_len = min_t(size_t, vec[i].iov_len, res);
-		partial[i].offset = 0;
-		partial[i].len = this_len;
+		spd.partial[i].offset = 0;
+		spd.partial[i].len = this_len;
 		if (!this_len) {
-			__free_page(pages[i]);
-			pages[i] = NULL;
+			__free_page(spd.pages[i]);
+			spd.pages[i] = NULL;
 			nr_freed++;
 		}
 		res -= this_len;
@@ -625,13 +670,18 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	if (res > 0)
 		*ppos += res;
 
+shrink_ret:
+	if (vec != __vec)
+		kfree(vec);
+	splice_shrink_spd(pipe, &spd);
 	return res;
 
 err:
 	for (i = 0; i < spd.nr_pages; i++)
-		__free_page(pages[i]);
+		__free_page(spd.pages[i]);
 
-	return error;
+	res = error;
+	goto shrink_ret;
 }
 EXPORT_SYMBOL(default_file_splice_read);
 
@@ -784,7 +834,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 		if (!buf->len) {
 			buf->ops = NULL;
 			ops->release(pipe, buf);
-			pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
+			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 			pipe->nrbufs--;
 			if (pipe->inode)
 				sd->need_wakeup = true;
@@ -1211,7 +1261,7 @@ out_release:
 	 * If we did an incomplete transfer we must release
 	 * the pipe buffers in question:
 	 */
-	for (i = 0; i < PIPE_BUFFERS; i++) {
+	for (i = 0; i < pipe->buffers; i++) {
 		struct pipe_buffer *buf = pipe->bufs + i;
 
 		if (buf->ops) {
@@ -1371,7 +1421,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
  */
 static int get_iovec_page_array(const struct iovec __user *iov,
 				unsigned int nr_vecs, struct page **pages,
-				struct partial_page *partial, int aligned)
+				struct partial_page *partial, int aligned,
+				unsigned int pipe_buffers)
 {
 	int buffers = 0, error = 0;
 
@@ -1414,8 +1465,8 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 			break;
 
 		npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		if (npages > PIPE_BUFFERS - buffers)
-			npages = PIPE_BUFFERS - buffers;
+		if (npages > pipe_buffers - buffers)
+			npages = pipe_buffers - buffers;
 
 		error = get_user_pages_fast((unsigned long)base, npages,
 					0, &pages[buffers]);
@@ -1450,7 +1501,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
 		 * or if we mapped the max number of pages that we have
 		 * room for.
 		 */
-		if (error < npages || buffers == PIPE_BUFFERS)
+		if (error < npages || buffers == pipe_buffers)
 			break;
 
 		nr_vecs--;
@@ -1593,8 +1644,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
 			     unsigned long nr_segs, unsigned int flags)
 {
 	struct pipe_inode_info *pipe;
-	struct page *pages[PIPE_BUFFERS];
-	struct partial_page partial[PIPE_BUFFERS];
+	struct page *pages[PIPE_DEF_BUFFERS];
+	struct partial_page partial[PIPE_DEF_BUFFERS];
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
@@ -1602,17 +1653,25 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
 		.ops = &user_page_pipe_buf_ops,
 		.spd_release = spd_release_page,
 	};
+	long ret;
 
 	pipe = pipe_info(file->f_path.dentry->d_inode);
 	if (!pipe)
 		return -EBADF;
 
-	spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
-					    flags & SPLICE_F_GIFT);
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
+	spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
+					    spd.partial, flags & SPLICE_F_GIFT,
+					    pipe->buffers);
 	if (spd.nr_pages <= 0)
-		return spd.nr_pages;
+		ret = spd.nr_pages;
+	else
+		ret = splice_to_pipe(pipe, &spd);
 
-	return splice_to_pipe(pipe, &spd);
+	splice_shrink_spd(pipe, &spd);
+	return ret;
 }
 
 /*
@@ -1738,13 +1797,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 	 * Check ->nrbufs without the inode lock first. This function
 	 * is speculative anyways, so missing one is ok.
 	 */
-	if (pipe->nrbufs < PIPE_BUFFERS)
+	if (pipe->nrbufs < pipe->buffers)
 		return 0;
 
 	ret = 0;
 	pipe_lock(pipe);
 
-	while (pipe->nrbufs >= PIPE_BUFFERS) {
+	while (pipe->nrbufs >= pipe->buffers) {
 		if (!pipe->readers) {
 			send_sig(SIGPIPE, current, 0);
 			ret = -EPIPE;
@@ -1810,7 +1869,7 @@ retry:
 		 * Cannot make any progress, because either the input
 		 * pipe is empty or the output pipe is full.
 		 */
-		if (!ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) {
+		if (!ipipe->nrbufs || opipe->nrbufs >= opipe->buffers) {
 			/* Already processed some buffers, break */
 			if (ret)
 				break;
@@ -1831,7 +1890,7 @@ retry:
 		}
 
 		ibuf = ipipe->bufs + ipipe->curbuf;
-		nbuf = (opipe->curbuf + opipe->nrbufs) % PIPE_BUFFERS;
+		nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
 		obuf = opipe->bufs + nbuf;
 
 		if (len >= ibuf->len) {
@@ -1841,7 +1900,7 @@ retry:
 			*obuf = *ibuf;
 			ibuf->ops = NULL;
 			opipe->nrbufs++;
-			ipipe->curbuf = (ipipe->curbuf + 1) % PIPE_BUFFERS;
+			ipipe->curbuf = (ipipe->curbuf + 1) & (ipipe->buffers - 1);
 			ipipe->nrbufs--;
 			input_wakeup = true;
 		} else {
@@ -1914,11 +1973,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		 * If we have iterated all input buffers or ran out of
 		 * output room, break.
 		 */
-		if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
+		if (i >= ipipe->nrbufs || opipe->nrbufs >= opipe->buffers)
 			break;
 
-		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
-		nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
+		ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (ipipe->buffers-1));
+		nbuf = (opipe->curbuf + opipe->nrbufs) & (opipe->buffers - 1);
 
 		/*
 		 * Get a reference to this pipe buffer,
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 86037400a6e3..afc00af3229b 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -21,6 +21,12 @@
  */
 #define F_NOTIFY	(F_LINUX_SPECIFIC_BASE+2)
 
+/*
+ * Set and get of pipe page size array
+ */
+#define F_SETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 7)
+#define F_GETPIPE_SZ	(F_LINUX_SPECIFIC_BASE + 8)
+
 /*
  * Types of directory notifications that may be requested.
  */
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index b43a9e039059..65f4282fcbaf 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -3,7 +3,7 @@
 
 #define PIPEFS_MAGIC 0x50495045
 
-#define PIPE_BUFFERS (16)
+#define PIPE_DEF_BUFFERS	16
 
 #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
 #define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
@@ -44,17 +44,17 @@ struct pipe_buffer {
  **/
 struct pipe_inode_info {
 	wait_queue_head_t wait;
-	unsigned int nrbufs, curbuf;
-	struct page *tmp_page;
+	unsigned int nrbufs, curbuf, buffers;
 	unsigned int readers;
 	unsigned int writers;
 	unsigned int waiting_writers;
 	unsigned int r_counter;
 	unsigned int w_counter;
+	struct page *tmp_page;
 	struct fasync_struct *fasync_readers;
 	struct fasync_struct *fasync_writers;
 	struct inode *inode;
-	struct pipe_buffer bufs[PIPE_BUFFERS];
+	struct pipe_buffer *bufs;
 };
 
 /*
@@ -154,4 +154,7 @@ int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
 
+/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
+long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
+
 #endif
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 18e7c7c0cae6..997c3b4c212b 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -82,4 +82,11 @@ extern ssize_t splice_to_pipe(struct pipe_inode_info *,
 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
 				      splice_direct_actor *);
 
+/*
+ * for dynamic pipe sizing
+ */
+extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *);
+extern void splice_shrink_spd(struct pipe_inode_info *,
+				struct splice_pipe_desc *);
+
 #endif
diff --git a/kernel/relay.c b/kernel/relay.c
index 3d97f2821611..4268287148c1 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1231,8 +1231,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
 	size_t read_subbuf = read_start / subbuf_size;
 	size_t padding = rbuf->padding[read_subbuf];
 	size_t nonpad_end = read_subbuf * subbuf_size + subbuf_size - padding;
-	struct page *pages[PIPE_BUFFERS];
-	struct partial_page partial[PIPE_BUFFERS];
+	struct page *pages[PIPE_DEF_BUFFERS];
+	struct partial_page partial[PIPE_DEF_BUFFERS];
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.nr_pages = 0,
@@ -1245,6 +1245,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
 
 	if (rbuf->subbufs_produced == rbuf->subbufs_consumed)
 		return 0;
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
 
 	/*
 	 * Adjust read len, if longer than what is available
@@ -1255,7 +1257,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
 	subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
 	pidx = (read_start / PAGE_SIZE) % subbuf_pages;
 	poff = read_start & ~PAGE_MASK;
-	nr_pages = min_t(unsigned int, subbuf_pages, PIPE_BUFFERS);
+	nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers);
 
 	for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
 		unsigned int this_len, this_end, private;
@@ -1289,16 +1291,19 @@ static ssize_t subbuf_splice_actor(struct file *in,
 		}
 	}
 
+	ret = 0;
 	if (!spd.nr_pages)
-		return 0;
+		goto out;
 
 	ret = *nonpad_ret = splice_to_pipe(pipe, &spd);
 	if (ret < 0 || ret < total_len)
-		return ret;
+		goto out;
 
         if (read_start + ret == nonpad_end)
                 ret += padding;
 
+out:
+	splice_shrink_spd(pipe, &spd);
         return ret;
 }
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 44f916a04065..7b155a0e6f31 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3269,12 +3269,12 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 					size_t len,
 					unsigned int flags)
 {
-	struct page *pages[PIPE_BUFFERS];
-	struct partial_page partial[PIPE_BUFFERS];
+	struct page *pages_def[PIPE_DEF_BUFFERS];
+	struct partial_page partial_def[PIPE_DEF_BUFFERS];
 	struct trace_iterator *iter = filp->private_data;
 	struct splice_pipe_desc spd = {
-		.pages		= pages,
-		.partial	= partial,
+		.pages		= pages_def,
+		.partial	= partial_def,
 		.nr_pages	= 0, /* This gets updated below. */
 		.flags		= flags,
 		.ops		= &tracing_pipe_buf_ops,
@@ -3285,6 +3285,9 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 	size_t rem;
 	unsigned int i;
 
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
 	/* copy the tracer to avoid using a global lock all around */
 	mutex_lock(&trace_types_lock);
 	if (unlikely(old_tracer != current_trace && current_trace)) {
@@ -3315,23 +3318,23 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 	trace_access_lock(iter->cpu_file);
 
 	/* Fill as many pages as possible. */
-	for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
-		pages[i] = alloc_page(GFP_KERNEL);
-		if (!pages[i])
+	for (i = 0, rem = len; i < pipe->buffers && rem; i++) {
+		spd.pages[i] = alloc_page(GFP_KERNEL);
+		if (!spd.pages[i])
 			break;
 
 		rem = tracing_fill_pipe_page(rem, iter);
 
 		/* Copy the data into the page, so we can start over. */
 		ret = trace_seq_to_buffer(&iter->seq,
-					  page_address(pages[i]),
+					  page_address(spd.pages[i]),
 					  iter->seq.len);
 		if (ret < 0) {
-			__free_page(pages[i]);
+			__free_page(spd.pages[i]);
 			break;
 		}
-		partial[i].offset = 0;
-		partial[i].len = iter->seq.len;
+		spd.partial[i].offset = 0;
+		spd.partial[i].len = iter->seq.len;
 
 		trace_seq_init(&iter->seq);
 	}
@@ -3342,12 +3345,14 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 
 	spd.nr_pages = i;
 
-	return splice_to_pipe(pipe, &spd);
+	ret = splice_to_pipe(pipe, &spd);
+out:
+	splice_shrink_spd(pipe, &spd);
+	return ret;
 
 out_err:
 	mutex_unlock(&iter->mutex);
-
-	return ret;
+	goto out;
 }
 
 static ssize_t
@@ -3746,11 +3751,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 			    unsigned int flags)
 {
 	struct ftrace_buffer_info *info = file->private_data;
-	struct partial_page partial[PIPE_BUFFERS];
-	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial_def[PIPE_DEF_BUFFERS];
+	struct page *pages_def[PIPE_DEF_BUFFERS];
 	struct splice_pipe_desc spd = {
-		.pages		= pages,
-		.partial	= partial,
+		.pages		= pages_def,
+		.partial	= partial_def,
 		.flags		= flags,
 		.ops		= &buffer_pipe_buf_ops,
 		.spd_release	= buffer_spd_release,
@@ -3759,22 +3764,28 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 	int entries, size, i;
 	size_t ret;
 
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
 	if (*ppos & (PAGE_SIZE - 1)) {
 		WARN_ONCE(1, "Ftrace: previous read must page-align\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	if (len & (PAGE_SIZE - 1)) {
 		WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
-		if (len < PAGE_SIZE)
-			return -EINVAL;
+		if (len < PAGE_SIZE) {
+			ret = -EINVAL;
+			goto out;
+		}
 		len &= PAGE_MASK;
 	}
 
 	trace_access_lock(info->cpu);
 	entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
 
-	for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
+	for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) {
 		struct page *page;
 		int r;
 
@@ -3829,11 +3840,12 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		else
 			ret = 0;
 		/* TODO: block */
-		return ret;
+		goto out;
 	}
 
 	ret = splice_to_pipe(pipe, &spd);
-
+	splice_shrink_spd(pipe, &spd);
+out:
 	return ret;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e060c91e..931981774b1a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1417,12 +1417,13 @@ new_page:
 /*
  * Fill page/offset/length into spd, if it can hold more pages.
  */
-static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
+static inline int spd_fill_page(struct splice_pipe_desc *spd,
+				struct pipe_inode_info *pipe, struct page *page,
 				unsigned int *len, unsigned int offset,
 				struct sk_buff *skb, int linear,
 				struct sock *sk)
 {
-	if (unlikely(spd->nr_pages == PIPE_BUFFERS))
+	if (unlikely(spd->nr_pages == pipe->buffers))
 		return 1;
 
 	if (linear) {
@@ -1458,7 +1459,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 				   unsigned int plen, unsigned int *off,
 				   unsigned int *len, struct sk_buff *skb,
 				   struct splice_pipe_desc *spd, int linear,
-				   struct sock *sk)
+				   struct sock *sk,
+				   struct pipe_inode_info *pipe)
 {
 	if (!*len)
 		return 1;
@@ -1481,7 +1483,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
 		/* the linear region may spread across several pages  */
 		flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
-		if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk))
+		if (spd_fill_page(spd, pipe, page, &flen, poff, skb, linear, sk))
 			return 1;
 
 		__segment_seek(&page, &poff, &plen, flen);
@@ -1496,9 +1498,9 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
  * Map linear and fragment data from the skb to spd. It reports failure if the
  * pipe is full or if we already spliced the requested length.
  */
-static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
-			     unsigned int *len, struct splice_pipe_desc *spd,
-			     struct sock *sk)
+static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
+			     unsigned int *offset, unsigned int *len,
+			     struct splice_pipe_desc *spd, struct sock *sk)
 {
 	int seg;
 
@@ -1508,7 +1510,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 	if (__splice_segment(virt_to_page(skb->data),
 			     (unsigned long) skb->data & (PAGE_SIZE - 1),
 			     skb_headlen(skb),
-			     offset, len, skb, spd, 1, sk))
+			     offset, len, skb, spd, 1, sk, pipe))
 		return 1;
 
 	/*
@@ -1518,7 +1520,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
 		if (__splice_segment(f->page, f->page_offset, f->size,
-				     offset, len, skb, spd, 0, sk))
+				     offset, len, skb, spd, 0, sk, pipe))
 			return 1;
 	}
 
@@ -1535,8 +1537,8 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int tlen,
 		    unsigned int flags)
 {
-	struct partial_page partial[PIPE_BUFFERS];
-	struct page *pages[PIPE_BUFFERS];
+	struct partial_page partial[PIPE_DEF_BUFFERS];
+	struct page *pages[PIPE_DEF_BUFFERS];
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
@@ -1546,12 +1548,16 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 	};
 	struct sk_buff *frag_iter;
 	struct sock *sk = skb->sk;
+	int ret = 0;
+
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
 
 	/*
 	 * __skb_splice_bits() only fails if the output has no room left,
 	 * so no point in going over the frag_list for the error case.
 	 */
-	if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk))
+	if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
 		goto done;
 	else if (!tlen)
 		goto done;
@@ -1562,14 +1568,12 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 	skb_walk_frags(skb, frag_iter) {
 		if (!tlen)
 			break;
-		if (__skb_splice_bits(frag_iter, &offset, &tlen, &spd, sk))
+		if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
 			break;
 	}
 
 done:
 	if (spd.nr_pages) {
-		int ret;
-
 		/*
 		 * Drop the socket lock, otherwise we have reverse
 		 * locking dependencies between sk_lock and i_mutex
@@ -1582,10 +1586,10 @@ done:
 		release_sock(sk);
 		ret = splice_to_pipe(pipe, &spd);
 		lock_sock(sk);
-		return ret;
 	}
 
-	return 0;
+	splice_shrink_spd(pipe, &spd);
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3-58-ga151


From c56e4acf55c804cbeea0ddb696ef698c73d39826 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Thu, 25 Mar 2010 12:40:35 +0000
Subject: 9p: VFS switches for 9p2000.L: protocol and client changes

Prepare p9pdu_read/write functions to handle multiple protocols.

Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 net/9p/client.c   | 3 ++-
 net/9p/protocol.c | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/9p/client.c b/net/9p/client.c
index 0aa79faa9850..e2d314674965 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1321,7 +1321,8 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 	if (wst->muid)
 		ret += strlen(wst->muid);
 
-	if (proto_version == p9_proto_2000u) {
+	if ((proto_version == p9_proto_2000u) ||
+		(proto_version == p9_proto_2000L)) {
 		ret += 2+4+4+4;	/* extension[s] n_uid[4] n_gid[4] n_muid[4] */
 		if (wst->extension)
 			ret += strlen(wst->extension);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index e7541d5b0118..77d3aab4036b 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -341,7 +341,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case '?':
-			if (proto_version != p9_proto_2000u)
+			if ((proto_version != p9_proto_2000u) &&
+				(proto_version != p9_proto_2000L))
 				return 0;
 			break;
 		default:
@@ -488,7 +489,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 			}
 			break;
 		case '?':
-			if (proto_version != p9_proto_2000u)
+			if ((proto_version != p9_proto_2000u) &&
+				(proto_version != p9_proto_2000L))
 				return 0;
 			break;
 		default:
-- 
cgit v1.2.3-58-ga151


From bda8e7752063cdbdd1d308bc1705400a8cec1aeb Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Thu, 25 Mar 2010 12:45:30 +0000
Subject: 9p: add 9P2000.L statfs operation

I made a V2 of this patch on top of my patches for VFS switches. The
change was adding v9fs_statfs pointer to v9fs_super_ops_dotl
instead of v9fs_super_ops.

statfs - get file system statistics

size[4] Tstatfs tag[2] fid[4]
size[4] Rstatfs tag[2] type[4] bsize[4] blocks[8] bfree[8] bavail[8]
                files[8] ffree[8] fsid[8] namelen[4]

The statfs message is used to request file system information returned
by the statfs(2) system call, which is used by df(1) to report file
system and disk space usage.

Signed-off-by: Jim Garlick <garlick@llnl.gov>
Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_super.c       | 39 ++++++++++++++++++++++++++++++++++++++-
 include/net/9p/9p.h     | 20 ++++++++++++++++++++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index cc3fa8c3aab6..be74d020436e 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -38,6 +38,7 @@
 #include <linux/idr.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/statfs.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
@@ -214,6 +215,42 @@ v9fs_umount_begin(struct super_block *sb)
 	v9fs_session_begin_cancel(v9ses);
 }
 
+static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct v9fs_session_info *v9ses;
+	struct p9_fid *fid;
+	struct p9_rstatfs rs;
+	int res;
+
+	fid = v9fs_fid_lookup(dentry);
+	if (IS_ERR(fid)) {
+		res = PTR_ERR(fid);
+		goto done;
+	}
+
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	if (v9fs_proto_dotl(v9ses)) {
+		res = p9_client_statfs(fid, &rs);
+		if (res == 0) {
+			buf->f_type = rs.type;
+			buf->f_bsize = rs.bsize;
+			buf->f_blocks = rs.blocks;
+			buf->f_bfree = rs.bfree;
+			buf->f_bavail = rs.bavail;
+			buf->f_files = rs.files;
+			buf->f_ffree = rs.ffree;
+			buf->f_fsid.val[0] = rs.fsid & 0xFFFFFFFFUL;
+			buf->f_fsid.val[1] = (rs.fsid >> 32) & 0xFFFFFFFFUL;
+			buf->f_namelen = rs.namelen;
+		}
+		if (res != -ENOSYS)
+			goto done;
+	}
+	res = simple_statfs(dentry, buf);
+done:
+	return res;
+}
+
 static const struct super_operations v9fs_super_ops = {
 #ifdef CONFIG_9P_FSCACHE
 	.alloc_inode = v9fs_alloc_inode,
@@ -230,7 +267,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
 	.alloc_inode = v9fs_alloc_inode,
 	.destroy_inode = v9fs_destroy_inode,
 #endif
-	.statfs = simple_statfs,
+	.statfs = v9fs_statfs,
 	.clear_inode = v9fs_clear_inode,
 	.show_options = generic_show_options,
 	.umount_begin = v9fs_umount_begin,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index a7fb54808a23..59300dc22dd0 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -86,6 +86,8 @@ do { \
 
 /**
  * enum p9_msg_t - 9P message types
+ * @P9_TSTATFS: file system status request
+ * @P9_RSTATFS: file system status response
  * @P9_TVERSION: version handshake request
  * @P9_RVERSION: version handshake response
  * @P9_TAUTH: request to establish authentication channel
@@ -125,6 +127,8 @@ do { \
  */
 
 enum p9_msg_t {
+	P9_TSTATFS = 8,
+	P9_RSTATFS,
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
@@ -350,6 +354,22 @@ struct p9_wstat {
 };
 
 /* Structures for Protocol Operations */
+struct p9_tstatfs {
+	u32 fid;
+};
+
+struct p9_rstatfs {
+	u32 type;
+	u32 bsize;
+	u64 blocks;
+	u64 bfree;
+	u64 bavail;
+	u64 files;
+	u64 ffree;
+	u64 fsid;
+	u32 namelen;
+};
+
 struct p9_tversion {
 	u32 msize;
 	struct p9_str version;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 4f3760afc20f..f83c0d99e592 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -195,6 +195,7 @@ struct p9_fid {
 	struct list_head dlist;	/* list of all fids attached to a dentry */
 };
 
+int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb);
 int p9_client_version(struct p9_client *);
 struct p9_client *p9_client_create(const char *dev_name, char *options);
 void p9_client_destroy(struct p9_client *clnt);
diff --git a/net/9p/client.c b/net/9p/client.c
index e2d314674965..430a1c4a7c6f 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1365,3 +1365,42 @@ error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_wstat);
+
+int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+	clnt = fid->clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
+
+	req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	err = p9pdu_readf(req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type,
+		&sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,
+		&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);
+	if (err) {
+		p9pdu_dump(1, req->rc);
+		p9_free_req(clnt, req);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "
+		"blocks %llu bfree %llu bavail %llu files %llu ffree %llu "
+		"fsid %llu namelen %ld\n",
+		fid->fid, (long unsigned int)sb->type, (long int)sb->bsize,
+		sb->blocks, sb->bfree, sb->bavail, sb->files,  sb->ffree,
+		sb->fsid, (long int)sb->namelen);
+
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_statfs);
-- 
cgit v1.2.3-58-ga151


From 4681dbdacb5cdc4d3273c3a97a1858d6e00a5fe7 Mon Sep 17 00:00:00 2001
From: Sripathi Kodi <sripathik@in.ibm.com>
Date: Thu, 25 Mar 2010 12:47:26 +0000
Subject: 9p: add 9P2000.L rename operation

I made a V2 of this patch on top of my patches for VFS switches.
All the changes were due to change in some offsets.

rename - change name of file or directory

size[4] Trename tag[2] fid[4] newdirfid[4] name[s]
size[4] Rrename tag[2]

The rename message is used to change the name of a file, possibly moving it
to a new directory.  The 9P wstat message can only rename a file within the
same directory.

Signed-off-by: Jim Garlick <garlick@llnl.gov>
Signed-off-by: Sripathi Kodi <sripathik@in.ibm.com>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/vfs_inode.c       |  7 +++++++
 include/net/9p/9p.h     | 13 +++++++++++++
 include/net/9p/client.h |  1 +
 net/9p/client.c         | 28 ++++++++++++++++++++++++++++
 4 files changed, 49 insertions(+)

(limited to 'net')

diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 13a8ed6088b8..de9a39590b70 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -794,6 +794,13 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto clunk_olddir;
 	}
 
+	if (v9fs_proto_dotl(v9ses)) {
+		retval = p9_client_rename(oldfid, newdirfid,
+					(char *) new_dentry->d_name.name);
+		if (retval != -ENOSYS)
+			goto clunk_newdir;
+	}
+
 	/* 9P can only handle file rename in the same directory */
 	if (memcmp(&olddirfid->qid, &newdirfid->qid, sizeof(newdirfid->qid))) {
 		P9_DPRINTK(P9_DEBUG_ERROR,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 59300dc22dd0..156c26bb8bd7 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -88,6 +88,8 @@ do { \
  * enum p9_msg_t - 9P message types
  * @P9_TSTATFS: file system status request
  * @P9_RSTATFS: file system status response
+ * @P9_TRENAME: rename request
+ * @P9_RRENAME: rename response
  * @P9_TVERSION: version handshake request
  * @P9_RVERSION: version handshake response
  * @P9_TAUTH: request to establish authentication channel
@@ -129,6 +131,8 @@ do { \
 enum p9_msg_t {
 	P9_TSTATFS = 8,
 	P9_RSTATFS,
+	P9_TRENAME = 20,
+	P9_RRENAME,
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
@@ -370,6 +374,15 @@ struct p9_rstatfs {
 	u32 namelen;
 };
 
+struct p9_trename {
+	u32 fid;
+	u32 newdirfid;
+	struct p9_str name;
+};
+
+struct p9_rrename {
+};
+
 struct p9_tversion {
 	u32 msize;
 	struct p9_str version;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index f83c0d99e592..7dd3ed85c782 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -196,6 +196,7 @@ struct p9_fid {
 };
 
 int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb);
+int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name);
 int p9_client_version(struct p9_client *);
 struct p9_client *p9_client_create(const char *dev_name, char *options);
 void p9_client_destroy(struct p9_client *clnt);
diff --git a/net/9p/client.c b/net/9p/client.c
index 430a1c4a7c6f..37c8da07a80b 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1404,3 +1404,31 @@ error:
 	return err;
 }
 EXPORT_SYMBOL(p9_client_statfs);
+
+int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)
+{
+	int err;
+	struct p9_req_t *req;
+	struct p9_client *clnt;
+
+	err = 0;
+	clnt = fid->clnt;
+
+	P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
+			fid->fid, newdirfid->fid, name);
+
+	req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid,
+			newdirfid->fid, name);
+	if (IS_ERR(req)) {
+		err = PTR_ERR(req);
+		goto error;
+	}
+
+	P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);
+
+	p9_free_req(clnt, req);
+error:
+	return err;
+}
+EXPORT_SYMBOL(p9_client_rename);
+
-- 
cgit v1.2.3-58-ga151


From 674b604cdd389252d89a14133b6ebf80165d1d55 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 19 May 2010 15:08:17 +0200
Subject: sunrpc: Pushdown the bkl from ioctl

Pushdown the bkl to rpc_pipe_ioctl.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Nfs <linux-nfs@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 net/sunrpc/rpc_pipe.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 20e30c6f8355..95ccbcf45d3e 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -27,6 +27,7 @@
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/cache.h>
+#include <linux/smp_lock.h>
 
 static struct vfsmount *rpc_mount __read_mostly;
 static int rpc_mount_count;
@@ -309,8 +310,7 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
 }
 
 static int
-rpc_pipe_ioctl(struct inode *ino, struct file *filp,
-		unsigned int cmd, unsigned long arg)
+rpc_pipe_ioctl_unlocked(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
 	int len;
@@ -331,13 +331,25 @@ rpc_pipe_ioctl(struct inode *ino, struct file *filp,
 	}
 }
 
+static long
+rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	long ret;
+
+	lock_kernel();
+	ret = rpc_pipe_ioctl_unlocked(filp, cmd, arg);
+	unlock_kernel();
+
+	return ret;
+}
+
 static const struct file_operations rpc_pipe_fops = {
 	.owner		= THIS_MODULE,
 	.llseek		= no_llseek,
 	.read		= rpc_pipe_read,
 	.write		= rpc_pipe_write,
 	.poll		= rpc_pipe_poll,
-	.ioctl		= rpc_pipe_ioctl,
+	.unlocked_ioctl	= rpc_pipe_ioctl,
 	.open		= rpc_pipe_open,
 	.release	= rpc_pipe_release,
 };
-- 
cgit v1.2.3-58-ga151


From 9918ff26b301e9a57f25fb12b44a46ad0c1e8f8f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 19 May 2010 15:08:17 +0200
Subject: sunrpc: Pushdown the bkl from sunrpc cache ioctl

Pushdown the bkl to cache_ioctl_pipefs.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Neil Brown <neilb@suse.de>
Cc: Nfs <linux-nfs@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: John Kacur <jkacur@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 net/sunrpc/cache.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 39bddba53ba1..91b1adec0d67 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -33,6 +33,7 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/smp_lock.h>
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
@@ -1525,12 +1526,18 @@ static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
 	return cache_poll(filp, wait, cd);
 }
 
-static int cache_ioctl_pipefs(struct inode *inode, struct file *filp,
+static long cache_ioctl_pipefs(struct file *filp,
 			      unsigned int cmd, unsigned long arg)
 {
+	struct inode *inode = filp->f_dentry->d_inode;
 	struct cache_detail *cd = RPC_I(inode)->private;
+	long ret;
 
-	return cache_ioctl(inode, filp, cmd, arg, cd);
+	lock_kernel();
+	ret = cache_ioctl(inode, filp, cmd, arg, cd);
+	unlock_kernel();
+
+	return ret;
 }
 
 static int cache_open_pipefs(struct inode *inode, struct file *filp)
@@ -1553,7 +1560,7 @@ const struct file_operations cache_file_operations_pipefs = {
 	.read		= cache_read_pipefs,
 	.write		= cache_write_pipefs,
 	.poll		= cache_poll_pipefs,
-	.ioctl		= cache_ioctl_pipefs, /* for FIONREAD */
+	.unlocked_ioctl	= cache_ioctl_pipefs, /* for FIONREAD */
 	.open		= cache_open_pipefs,
 	.release	= cache_release_pipefs,
 };
-- 
cgit v1.2.3-58-ga151


From a6c0f8217c17d46da22fa56923f3cbd03615cb7c Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <dkirjanov@hera.kernel.org>
Date: Sun, 23 May 2010 05:45:45 +0000
Subject: ieee802154: Fix possible NULL pointer dereference in wpan_phy_alloc

Check for NULL pointer after kzalloc

Signed-off-by: Denis Kirjanov <dkirjanov@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ieee802154/wpan-class.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 3d803a1b9fb6..1627ef2e8522 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -147,13 +147,15 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
 	struct wpan_phy *phy = kzalloc(sizeof(*phy) + priv_size,
 			GFP_KERNEL);
 
+	if (!phy)
+		goto out;
 	mutex_lock(&wpan_phy_mutex);
 	phy->idx = wpan_phy_idx++;
 	if (unlikely(!wpan_phy_idx_valid(phy->idx))) {
 		wpan_phy_idx--;
 		mutex_unlock(&wpan_phy_mutex);
 		kfree(phy);
-		return NULL;
+		goto out;
 	}
 	mutex_unlock(&wpan_phy_mutex);
 
@@ -168,6 +170,9 @@ struct wpan_phy *wpan_phy_alloc(size_t priv_size)
 	phy->current_page = 0; /* for compatibility */
 
 	return phy;
+
+out:
+	return NULL;
 }
 EXPORT_SYMBOL(wpan_phy_alloc);
 
-- 
cgit v1.2.3-58-ga151


From 53b0f08042f04813cd1a7473dacd3edfacb28eb3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 22 May 2010 20:37:44 +0000
Subject: net_sched: Fix qdisc_notify()

Ben Pfaff reported a kernel oops and provided a test program to
reproduce it.

https://kerneltrap.org/mailarchive/linux-netdev/2010/5/21/6277805

tc_fill_qdisc() should not be called for builtin qdisc, or it
dereference a NULL pointer to get device ifindex.

Fix is to always use tc_qdisc_dump_ignore() before calling
tc_fill_qdisc().

Reported-by: Ben Pfaff <blp@nicira.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index fe35c1f338c2..b9e8c3b7d406 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1195,6 +1195,11 @@ nla_put_failure:
 	return -1;
 }
 
+static bool tc_qdisc_dump_ignore(struct Qdisc *q)
+{
+	return (q->flags & TCQ_F_BUILTIN) ? true : false;
+}
+
 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 			struct nlmsghdr *n, u32 clid,
 			struct Qdisc *old, struct Qdisc *new)
@@ -1206,11 +1211,11 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
 	if (!skb)
 		return -ENOBUFS;
 
-	if (old && old->handle) {
+	if (old && !tc_qdisc_dump_ignore(old)) {
 		if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
 			goto err_out;
 	}
-	if (new) {
+	if (new && !tc_qdisc_dump_ignore(new)) {
 		if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
 			goto err_out;
 	}
@@ -1223,11 +1228,6 @@ err_out:
 	return -EINVAL;
 }
 
-static bool tc_qdisc_dump_ignore(struct Qdisc *q)
-{
-	return (q->flags & TCQ_F_BUILTIN) ? true : false;
-}
-
 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			      struct netlink_callback *cb,
 			      int *q_idx_p, int s_q_idx)
-- 
cgit v1.2.3-58-ga151


From eb1669aed950cb5f34622bcceba66bef5980e97a Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 21 May 2010 10:45:20 +0000
Subject: net-caif: drop redundant Kconfig entries

There is already a submenu entry that is always displayed, so there is
no need to also show a dedicated CAIF comment.

Drop dead commented code while we're here, and change the submenu text
to better match the style everyone else is using.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/Kconfig | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index cd1daf6008bd..ed651786f16b 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -2,10 +2,8 @@
 # CAIF net configurations
 #
 
-#menu "CAIF Support"
-comment "CAIF Support"
 menuconfig CAIF
-	tristate "Enable CAIF support"
+	tristate "CAIF support"
 	select CRC_CCITT
 	default n
 	---help---
@@ -45,4 +43,3 @@ config CAIF_NETDEV
 	If unsure say Y.
 
 endif
-#endmenu
-- 
cgit v1.2.3-58-ga151


From 253683bbfb6bc5864417c8c35cb6ef13b5e259e6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 21 May 2010 02:25:27 +0000
Subject: rtnetlink: Fix error handling in do_setlink()

Commit c02db8c6290bb992442fec1407643c94cc414375:

	Author:  Chris Wright <chrisw@sous-sol.org>
	Date:    Sun May 16 01:05:45 2010 -0700
	Subject: rtnetlink: make SR-IOV VF interface symmetric

adds broken error handling to do_setlink() in net/core/rtnetlink.c.  The
problem is the following chunk of code:

	if (tb[IFLA_VFINFO_LIST]) {
		struct nlattr *attr;
		int rem;
		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
			if (nla_type(attr) != IFLA_VF_INFO)
  ---->				goto errout;
			err = do_setvfinfo(dev, attr);
			if (err < 0)
				goto errout;
			modified = 1;
		}
	}

which can get to errout without setting err, resulting in the following error:

net/core/rtnetlink.c: In function 'do_setlink':
net/core/rtnetlink.c:904: warning: 'err' may be used uninitialized in this function

Change the code to return -EINVAL in this case.  Note that this might not be
the appropriate error though.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Chris Wright <chrisw@sous-sol.org>
cc: David S. Miller <davem@davemloft.net>
Acked-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e4b9870e4706..7ab86f3a1ea4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1199,8 +1199,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		struct nlattr *attr;
 		int rem;
 		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
-			if (nla_type(attr) != IFLA_VF_INFO)
+			if (nla_type(attr) != IFLA_VF_INFO) {
+				err = -EINVAL;
 				goto errout;
+			}
 			err = do_setvfinfo(dev, attr);
 			if (err < 0)
 				goto errout;
-- 
cgit v1.2.3-58-ga151


From 8ce6cebc2f126f3ecf2d80746ea54245adf18057 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@free.fr>
Date: Wed, 19 May 2010 10:12:19 +0000
Subject: net-2.6 : V2 - fix dev_get_valid_name

the commit:

commit d90310243fd750240755e217c5faa13e24f41536
Author: Octavian Purdila <opurdila@ixiacom.com>
Date:   Wed Nov 18 02:36:59 2009 +0000

    net: device name allocation cleanups

introduced a bug when there is a hash collision making impossible
to rename a device with eth%d. This bug is very hard to reproduce
and appears rarely.

The problem is coming from we don't pass a temporary buffer to
__dev_alloc_name but 'dev->name' which is modified by the function.

A detailed explanation is here:

http://marc.info/?l=linux-netdev&m=127417784011987&w=2

Changelog:
 V2 : replaced strings comparison by pointers comparison

Signed-off-by: Daniel Lezcano <daniel.lezcano@free.fr>
Reviewed-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0aab66d68b19..07a48e2bf7db 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -954,18 +954,22 @@ int dev_alloc_name(struct net_device *dev, const char *name)
 }
 EXPORT_SYMBOL(dev_alloc_name);
 
-static int dev_get_valid_name(struct net *net, const char *name, char *buf,
-			      bool fmt)
+static int dev_get_valid_name(struct net_device *dev, const char *name, bool fmt)
 {
+	struct net *net;
+
+	BUG_ON(!dev_net(dev));
+	net = dev_net(dev);
+
 	if (!dev_valid_name(name))
 		return -EINVAL;
 
 	if (fmt && strchr(name, '%'))
-		return __dev_alloc_name(net, name, buf);
+		return dev_alloc_name(dev, name);
 	else if (__dev_get_by_name(net, name))
 		return -EEXIST;
-	else if (buf != name)
-		strlcpy(buf, name, IFNAMSIZ);
+	else if (dev->name != name)
+		strlcpy(dev->name, name, IFNAMSIZ);
 
 	return 0;
 }
@@ -997,7 +1001,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
 
 	memcpy(oldname, dev->name, IFNAMSIZ);
 
-	err = dev_get_valid_name(net, newname, dev->name, 1);
+	err = dev_get_valid_name(dev, newname, 1);
 	if (err < 0)
 		return err;
 
@@ -4965,7 +4969,7 @@ int register_netdevice(struct net_device *dev)
 		}
 	}
 
-	ret = dev_get_valid_name(net, dev->name, dev->name, 0);
+	ret = dev_get_valid_name(dev, dev->name, 0);
 	if (ret)
 		goto err_uninit;
 
@@ -5574,7 +5578,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 		/* We get here if we can't use the current device name */
 		if (!pat)
 			goto out;
-		if (dev_get_valid_name(net, pat, dev->name, 1))
+		if (dev_get_valid_name(dev, pat, 1))
 			goto out;
 	}
 
-- 
cgit v1.2.3-58-ga151


From 9e4b816bc31962ebbb8784d602acd5fa25a08ad8 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:07 +0000
Subject: caif: Bugfix - wait_ev*_timeout returns long.

Discovered bug when testing on 64bit architecture.
Fixed by using long to store result from wait_event_interruptible_timeout.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index c3a70c5c893a..77e99568acda 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -920,17 +920,17 @@ wait_connect:
 	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
 
 	release_sock(sk);
-	err = wait_event_interruptible_timeout(*sk_sleep(sk),
+	err = -ERESTARTSYS;
+	timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
 			sk->sk_state != CAIF_CONNECTING,
 			timeo);
 	lock_sock(sk);
-	if (err < 0)
+	if (timeo < 0)
 		goto out; /* -ERESTARTSYS */
-	if (err == 0 && sk->sk_state != CAIF_CONNECTED) {
-		err = -ETIMEDOUT;
-		goto out;
-	}
 
+	err = -ETIMEDOUT;
+	if (timeo == 0 && sk->sk_state != CAIF_CONNECTED)
+		goto out;
 	if (sk->sk_state != CAIF_CONNECTED) {
 		sock->state = SS_UNCONNECTED;
 		err = sock_error(sk);
@@ -945,7 +945,6 @@ out:
 	return err;
 }
 
-
 /*
  * caif_release() - Disconnect a CAIF Socket
  * Copied and modified af_irda.c:irda_release().
-- 
cgit v1.2.3-58-ga151


From 7aecf4944f2c05aafb73b4820e469c74b4ec8517 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:08 +0000
Subject: caif: Bugfix - use standard Linux lists

Discovered bug when running high number of parallel connect requests.
Replace buggy home brewed list with linux/list.h.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfctrl.h |  4 +--
 net/caif/cfctrl.c         | 92 ++++++++++++++---------------------------------
 2 files changed, 28 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h
index 997603f2bf4c..9402543fc20d 100644
--- a/include/net/caif/cfctrl.h
+++ b/include/net/caif/cfctrl.h
@@ -94,8 +94,8 @@ struct cfctrl_request_info {
 	enum cfctrl_cmd cmd;
 	u8 channel_id;
 	struct cfctrl_link_param param;
-	struct cfctrl_request_info *next;
 	struct cflayer *client_layer;
+	struct list_head list;
 };
 
 struct cfctrl {
@@ -103,7 +103,7 @@ struct cfctrl {
 	struct cfctrl_rsp res;
 	atomic_t req_seq_no;
 	atomic_t rsp_seq_no;
-	struct cfctrl_request_info *first_req;
+	struct list_head list;
 	/* Protects from simultaneous access to first_req list */
 	spinlock_t info_list_lock;
 #ifndef CAIF_NO_LOOP
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 0ffe1e1ce901..fcfda98a5e6d 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -44,13 +44,14 @@ struct cflayer *cfctrl_create(void)
 	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
 	cfsrvl_init(&this->serv, 0, &dev_info);
-	spin_lock_init(&this->info_list_lock);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
 	this->serv.layer.receive = cfctrl_recv;
 	sprintf(this->serv.layer.name, "ctrl");
 	this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
 	spin_lock_init(&this->loop_linkid_lock);
+	spin_lock_init(&this->info_list_lock);
+	INIT_LIST_HEAD(&this->list);
 	this->loop_linkid = 1;
 	return &this->serv.layer;
 }
@@ -112,20 +113,10 @@ bool cfctrl_req_eq(struct cfctrl_request_info *r1,
 void cfctrl_insert_req(struct cfctrl *ctrl,
 			      struct cfctrl_request_info *req)
 {
-	struct cfctrl_request_info *p;
 	spin_lock(&ctrl->info_list_lock);
-	req->next = NULL;
 	atomic_inc(&ctrl->req_seq_no);
 	req->sequence_no = atomic_read(&ctrl->req_seq_no);
-	if (ctrl->first_req == NULL) {
-		ctrl->first_req = req;
-		spin_unlock(&ctrl->info_list_lock);
-		return;
-	}
-	p = ctrl->first_req;
-	while (p->next != NULL)
-		p = p->next;
-	p->next = req;
+	list_add_tail(&req->list, &ctrl->list);
 	spin_unlock(&ctrl->info_list_lock);
 }
 
@@ -133,46 +124,28 @@ void cfctrl_insert_req(struct cfctrl *ctrl,
 struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
 					      struct cfctrl_request_info *req)
 {
-	struct cfctrl_request_info *p;
-	struct cfctrl_request_info *ret;
+	struct cfctrl_request_info *p, *tmp, *first;
 
 	spin_lock(&ctrl->info_list_lock);
-	if (ctrl->first_req == NULL) {
-		spin_unlock(&ctrl->info_list_lock);
-		return NULL;
-	}
-
-	if (cfctrl_req_eq(req, ctrl->first_req)) {
-		ret = ctrl->first_req;
-		caif_assert(ctrl->first_req);
-		atomic_set(&ctrl->rsp_seq_no,
-				 ctrl->first_req->sequence_no);
-		ctrl->first_req = ctrl->first_req->next;
-		spin_unlock(&ctrl->info_list_lock);
-		return ret;
-	}
+	first = list_first_entry(&ctrl->list, struct cfctrl_request_info, list);
 
-	p = ctrl->first_req;
-
-	while (p->next != NULL) {
-		if (cfctrl_req_eq(req, p->next)) {
-			pr_warning("CAIF: %s(): Requests are not "
+	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
+		if (cfctrl_req_eq(req, p)) {
+			if (p != first)
+				pr_warning("CAIF: %s(): Requests are not "
 					"received in order\n",
 					__func__);
-			ret = p->next;
+
 			atomic_set(&ctrl->rsp_seq_no,
-					p->next->sequence_no);
-			p->next = p->next->next;
-			spin_unlock(&ctrl->info_list_lock);
-			return ret;
+					 p->sequence_no);
+			list_del(&p->list);
+			goto out;
 		}
-		p = p->next;
 	}
+	p = NULL;
+out:
 	spin_unlock(&ctrl->info_list_lock);
-
-	pr_warning("CAIF: %s(): Request does not match\n",
-		   __func__);
-	return NULL;
+	return p;
 }
 
 struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
@@ -388,31 +361,18 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
 
 void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
 {
-	struct cfctrl_request_info *p, *req;
+	struct cfctrl_request_info *p, *tmp;
 	struct cfctrl *ctrl = container_obj(layr);
 	spin_lock(&ctrl->info_list_lock);
-
-	if (ctrl->first_req == NULL) {
-		spin_unlock(&ctrl->info_list_lock);
-		return;
-	}
-
-	if (ctrl->first_req->client_layer == adap_layer) {
-
-		req = ctrl->first_req;
-		ctrl->first_req = ctrl->first_req->next;
-		kfree(req);
-	}
-
-	p = ctrl->first_req;
-	while (p != NULL && p->next != NULL) {
-		if (p->next->client_layer == adap_layer) {
-
-			req = p->next;
-			p->next = p->next->next;
-			kfree(p->next);
+	pr_warning("CAIF: %s(): enter\n", __func__);
+
+	list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
+		if (p->client_layer == adap_layer) {
+			pr_warning("CAIF: %s(): cancel req :%d\n", __func__,
+					p->sequence_no);
+			list_del(&p->list);
+			kfree(p);
 		}
-		p = p->next;
 	}
 
 	spin_unlock(&ctrl->info_list_lock);
@@ -634,7 +594,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
 		spin_lock(&this->info_list_lock);
-		if (this->first_req != NULL) {
+		if (!list_empty(&this->list)) {
 			pr_debug("CAIF: %s(): Received flow off in "
 				   "control layer", __func__);
 		}
-- 
cgit v1.2.3-58-ga151


From 638e628a600a5c542d46dfb06771cf9c229ef5f3 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:09 +0000
Subject: caif: Bugfix - handle mem-allocation failures

Discovered bugs when injecting slab allocation failures.
Add checks on all memory allocation.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfpkt_skbuff.c | 25 +++++++++++++++++--------
 net/caif/cfserl.c       |  3 ++-
 net/caif/cfsrvl.c       |  6 ++++++
 3 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 83fff2ff6658..a6fdf899741a 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -238,6 +238,7 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
 	struct sk_buff *lastskb;
 	u8 *to;
 	const u8 *data = data2;
+	int ret;
 	if (unlikely(is_erronous(pkt)))
 		return -EPROTO;
 	if (unlikely(skb_headroom(skb) < len)) {
@@ -246,9 +247,10 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
 	}
 
 	/* Make sure data is writable */
-	if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
+	ret = skb_cow_data(skb, 0, &lastskb);
+	if (unlikely(ret < 0)) {
 		PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n");
-		return -EPROTO;
+		return ret;
 	}
 
 	to = skb_push(skb, len);
@@ -316,6 +318,8 @@ EXPORT_SYMBOL(cfpkt_setlen);
 struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len)
 {
 	struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
+	if (!pkt)
+		return NULL;
 	if (unlikely(data != NULL))
 		cfpkt_add_body(pkt, data, len);
 	return pkt;
@@ -344,12 +348,13 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
 
 	if (dst->tail + neededtailspace > dst->end) {
 		/* Create a dumplicate of 'dst' with more tail space */
+		struct cfpkt *tmppkt;
 		dstlen = skb_headlen(dst);
 		createlen = dstlen + neededtailspace;
-		tmp = pkt_to_skb(
-			cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX));
-		if (!tmp)
+		tmppkt = cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX);
+		if (tmppkt == NULL)
 			return NULL;
+		tmp = pkt_to_skb(tmppkt);
 		skb_set_tail_pointer(tmp, dstlen);
 		tmp->len = dstlen;
 		memcpy(tmp->data, dst->data, dstlen);
@@ -368,6 +373,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
 {
 	struct sk_buff *skb2;
 	struct sk_buff *skb = pkt_to_skb(pkt);
+	struct cfpkt *tmppkt;
 	u8 *split = skb->data + pos;
 	u16 len2nd = skb_tail_pointer(skb) - split;
 
@@ -381,9 +387,12 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
 	}
 
 	/* Create a new packet for the second part of the data */
-	skb2 = pkt_to_skb(
-		cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
-				 PKT_PREFIX));
+	tmppkt = cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
+				  PKT_PREFIX);
+	if (tmppkt == NULL)
+		return NULL;
+	skb2 = pkt_to_skb(tmppkt);
+
 
 	if (skb2 == NULL)
 		return NULL;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 06029ea2da2f..cb4325a3dc83 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -67,6 +67,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 		layr->incomplete_frm =
 		    cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
 		pkt = layr->incomplete_frm;
+		if (pkt == NULL)
+			return -ENOMEM;
 	} else {
 		pkt = newpkt;
 	}
@@ -154,7 +156,6 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 			if (layr->usestx) {
 				if (tail_pkt != NULL)
 					pkt = cfpkt_append(pkt, tail_pkt, 0);
-
 				/* Start search for next STX if frame failed */
 				continue;
 			} else {
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index aff31f34528f..6e5b7079a684 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -123,6 +123,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
 			struct caif_payload_info *info;
 			u8 flow_off = SRVL_FLOW_OFF;
 			pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
+			if (!pkt) {
+				pr_warning("CAIF: %s(): Out of memory\n",
+					__func__);
+				return -ENOMEM;
+			}
+
 			if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
 				pr_err("CAIF: %s(): Packet is erroneous!\n",
 					__func__);
-- 
cgit v1.2.3-58-ga151


From ca6a09f25cde1d2d86f8e821cf69f4f650c30dbf Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:10 +0000
Subject: caif: Bugfix - Poll can't return POLLHUP while connecting.

Discovered bug when testing async connect.
While connecting poll should not return POLLHUP,
but POLLOUT when connected.
Also fixed the sysfs flow-control-counters.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 77e99568acda..732897d64f41 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -138,7 +138,7 @@ void caif_flow_ctrl(struct sock *sk, int mode)
 {
 	struct caifsock *cf_sk;
 	cf_sk = container_of(sk, struct caifsock, sk);
-	if (cf_sk->layer.dn)
+	if (cf_sk->layer.dn && cf_sk->layer.dn->modemcmd)
 		cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode);
 }
 
@@ -162,9 +162,8 @@ int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 			atomic_read(&cf_sk->sk.sk_rmem_alloc),
 			sk_rcvbuf_lowwater(cf_sk));
 		set_rx_flow_off(cf_sk);
-		if (cf_sk->layer.dn)
-			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-						CAIF_MODEMCMD_FLOW_OFF_REQ);
+		dbfs_atomic_inc(&cnt.num_rx_flow_off);
+		caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
 	}
 
 	err = sk_filter(sk, skb);
@@ -175,9 +174,8 @@ int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		trace_printk("CAIF: %s():"
 			" sending flow OFF due to rmem_schedule\n",
 			__func__);
-		if (cf_sk->layer.dn)
-			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-						CAIF_MODEMCMD_FLOW_OFF_REQ);
+		dbfs_atomic_inc(&cnt.num_rx_flow_off);
+		caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
 	}
 	skb->dev = NULL;
 	skb_set_owner_r(skb, sk);
@@ -285,16 +283,13 @@ static void caif_check_flow_release(struct sock *sk)
 {
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	if (cf_sk->layer.dn == NULL || cf_sk->layer.dn->modemcmd == NULL)
-		return;
 	if (rx_flow_is_on(cf_sk))
 		return;
 
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) {
 			dbfs_atomic_inc(&cnt.num_rx_flow_on);
 			set_rx_flow_on(cf_sk);
-			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-						CAIF_MODEMCMD_FLOW_ON_REQ);
+			caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ);
 	}
 }
 /*
@@ -1018,10 +1013,6 @@ static unsigned int caif_poll(struct file *file,
 		(sk->sk_shutdown & RCV_SHUTDOWN))
 		mask |= POLLIN | POLLRDNORM;
 
-	/* Connection-based need to check for termination and startup */
-	if (sk->sk_state == CAIF_DISCONNECTED)
-		mask |= POLLHUP;
-
 	/*
 	 * we set writable also when the other side has shut down the
 	 * connection. This prevents stuck sockets.
-- 
cgit v1.2.3-58-ga151


From a9a8f1070d8733b37418b3a2d58df4e771b61f88 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:11 +0000
Subject: caif: Bugfix - missing spin_unlock

Splint found missing spin_unlock.
Corrected this an some other trivial split warnings.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 10 +++++-----
 net/caif/cfmuxl.c      |  3 ++-
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 732897d64f41..691a5710974e 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -60,7 +60,7 @@ struct debug_fs_counter {
 	atomic_t num_rx_flow_off;
 	atomic_t num_rx_flow_on;
 };
-struct debug_fs_counter cnt;
+static struct debug_fs_counter cnt;
 #define	dbfs_atomic_inc(v) atomic_inc(v)
 #define	dbfs_atomic_dec(v) atomic_dec(v)
 #else
@@ -128,13 +128,13 @@ static void caif_read_unlock(struct sock *sk)
 	mutex_unlock(&cf_sk->readlock);
 }
 
-int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
+static int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
 {
 	/* A quarter of full buffer is used a low water mark */
 	return cf_sk->sk.sk_rcvbuf / 4;
 }
 
-void caif_flow_ctrl(struct sock *sk, int mode)
+static void caif_flow_ctrl(struct sock *sk, int mode)
 {
 	struct caifsock *cf_sk;
 	cf_sk = container_of(sk, struct caifsock, sk);
@@ -146,7 +146,7 @@ void caif_flow_ctrl(struct sock *sk, int mode)
  * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
  * not dropped, but CAIF is sending flow off instead.
  */
-int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int err;
 	int skb_len;
@@ -1184,7 +1184,7 @@ static struct net_proto_family caif_family_ops = {
 	.owner = THIS_MODULE,
 };
 
-int af_caif_init(void)
+static int af_caif_init(void)
 {
 	int err = sock_register(&caif_family_ops);
 	if (!err)
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 7372f27f1d32..80c8d332b258 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -174,10 +174,11 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
 	spin_lock(&muxl->receive_lock);
 	up = get_up(muxl, id);
 	if (up == NULL)
-		return NULL;
+		goto out;
 	memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
 	list_del(&up->node);
 	cfsrvl_put(up);
+out:
 	spin_unlock(&muxl->receive_lock);
 	return up;
 }
-- 
cgit v1.2.3-58-ga151


From dcda138d2f27e32bd0d6250cc42839b0d70bb4b8 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Fri, 21 May 2010 02:16:12 +0000
Subject: caif: Bugfix - use MSG_TRUNC in receive

Fixed handling when skb don't fit in user buffer,
instead of returning -EMSGSIZE, the buffer is truncated (just
as unix seqpakcet does).

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 47 ++++++++++++++++++-----------------------------
 1 file changed, 18 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 691a5710974e..3d0e09584fae 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -292,53 +292,42 @@ static void caif_check_flow_release(struct sock *sk)
 			caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_ON_REQ);
 	}
 }
+
 /*
- * Copied from sock.c:sock_queue_rcv_skb(), and added check that user buffer
- * has sufficient size.
+ * Copied from unix_dgram_recvmsg, but removed credit checks,
+ * changed locking, address handling and added MSG_TRUNC.
  */
-
 static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
-				struct msghdr *m, size_t buf_len, int flags)
+				struct msghdr *m, size_t len, int flags)
 
 {
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
-	int ret = 0;
-	int len;
+	int ret;
+	int copylen;
 
-	if (unlikely(!buf_len))
-		return -EINVAL;
+	ret = -EOPNOTSUPP;
+	if (m->msg_flags&MSG_OOB)
+		goto read_error;
 
 	skb = skb_recv_datagram(sk, flags, 0 , &ret);
 	if (!skb)
 		goto read_error;
-
-	len = skb->len;
-
-	if (skb && skb->len > buf_len && !(flags & MSG_PEEK)) {
-		len = buf_len;
-		/*
-		 * Push skb back on receive queue if buffer too small.
-		 * This has a built-in race where multi-threaded receive
-		 * may get packet in wrong order, but multiple read does
-		 * not really guarantee ordered delivery anyway.
-		 * Let's optimize for speed without taking locks.
-		 */
-
-		skb_queue_head(&sk->sk_receive_queue, skb);
-		ret = -EMSGSIZE;
-		goto read_error;
+	copylen = skb->len;
+	if (len < copylen) {
+		m->msg_flags |= MSG_TRUNC;
+		copylen = len;
 	}
 
-	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
+	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, copylen);
 	if (ret)
-		goto read_error;
+		goto out_free;
 
+	ret = (flags & MSG_TRUNC) ? skb->len : copylen;
+out_free:
 	skb_free_datagram(sk, skb);
-
 	caif_check_flow_release(sk);
-
-	return len;
+	return ret;
 
 read_error:
 	return ret;
-- 
cgit v1.2.3-58-ga151


From f845172531fb7410c7fb7780b1a6e51ee6df7d52 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 24 May 2010 00:12:34 -0700
Subject: cls_cgroup: Store classid in struct sock

Up until now cls_cgroup has relied on fetching the classid out of
the current executing thread.  This runs into trouble when a packet
processing is delayed in which case it may execute out of another
thread's context.

Furthermore, even when a packet is not delayed we may fail to
classify it if soft IRQs have been disabled, because this scenario
is indistinguishable from one where a packet unrelated to the
current thread is processed by a real soft IRQ.

In fact, the current semantics is inherently broken, as a single
skb may be constructed out of the writes of two different tasks.
A different manifestation of this problem is when the TCP stack
transmits in response of an incoming ACK.  This is currently
unclassified.

As we already have a concept of packet ownership for accounting
purposes in the skb->sk pointer, this is a natural place to store
the classid in a persistent manner.

This patch adds the cls_cgroup classid in struct sock, filling up
an existing hole on 64-bit :)

The value is set at socket creation time.  So all sockets created
via socket(2) automatically gains the ID of the thread creating it.
Whenever another process touches the socket by either reading or
writing to it, we will change the socket classid to that of the
process if it has a valid (non-zero) classid.

For sockets created on inbound connections through accept(2), we
inherit the classid of the original listening socket through
sk_clone, possibly preceding the actual accept(2) call.

In order to minimise risks, I have not made this the authoritative
classid.  For now it is only used as a backup when we execute
with soft IRQs disabled.  Once we're completely happy with its
semantics we can use it as the sole classid.

Footnote: I have rearranged the error path on cls_group module
creation.  If we didn't do this, then there is a window where
someone could create a tc rule using cls_group before the cgroup
subsystem has been registered.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/cls_cgroup.h | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/net/sock.h       | 10 +++++++-
 net/core/sock.c          | 18 ++++++++++++++
 net/sched/cls_cgroup.c   | 50 ++++++++++++++++++++++++++------------
 net/socket.c             |  9 +++++++
 5 files changed, 133 insertions(+), 17 deletions(-)
 create mode 100644 include/net/cls_cgroup.h

(limited to 'net')

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
new file mode 100644
index 000000000000..ef2df1475b51
--- /dev/null
+++ b/include/net/cls_cgroup.h
@@ -0,0 +1,63 @@
+/*
+ * cls_cgroup.h			Control Group Classifier
+ *
+ * Authors:	Thomas Graf <tgraf@suug.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _NET_CLS_CGROUP_H
+#define _NET_CLS_CGROUP_H
+
+#include <linux/cgroup.h>
+#include <linux/hardirq.h>
+#include <linux/rcupdate.h>
+
+#ifdef CONFIG_CGROUPS
+struct cgroup_cls_state
+{
+	struct cgroup_subsys_state css;
+	u32 classid;
+};
+
+#ifdef CONFIG_NET_CLS_CGROUP
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+	if (in_interrupt())
+		return 0;
+
+	return container_of(task_subsys_state(p, net_cls_subsys_id),
+			    struct cgroup_cls_state, css).classid;
+}
+#else
+extern int net_cls_subsys_id;
+
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+	int id;
+	u32 classid;
+
+	if (in_interrupt())
+		return 0;
+
+	rcu_read_lock();
+	id = rcu_dereference(net_cls_subsys_id);
+	if (id >= 0)
+		classid = container_of(task_subsys_state(p, id),
+				       struct cgroup_cls_state, css)->classid;
+	rcu_read_unlock();
+
+	return classid;
+}
+#endif
+#else
+static inline u32 task_cls_classid(struct task_struct *p)
+{
+	return 0;
+}
+#endif
+#endif  /* _NET_CLS_CGROUP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 5697caf8cc76..d24f382cb712 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -312,7 +312,7 @@ struct sock {
 	void			*sk_security;
 #endif
 	__u32			sk_mark;
-	/* XXX 4 bytes hole on 64 bit */
+	u32			sk_classid;
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk, int bytes);
 	void			(*sk_write_space)(struct sock *sk);
@@ -1074,6 +1074,14 @@ extern void *sock_kmalloc(struct sock *sk, int size,
 extern void sock_kfree_s(struct sock *sk, void *mem, int size);
 extern void sk_send_sigurg(struct sock *sk);
 
+#ifdef CONFIG_CGROUPS
+extern void sock_update_classid(struct sock *sk);
+#else
+static inline void sock_update_classid(struct sock *sk)
+{
+}
+#endif
+
 /*
  * Functions to fill in entries in struct proto_ops when a protocol
  * does not implement a particular function.
diff --git a/net/core/sock.c b/net/core/sock.c
index bf88a167c8f2..a05ae7f9771e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -123,6 +123,7 @@
 #include <linux/net_tstamp.h>
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
+#include <net/cls_cgroup.h>
 
 #include <linux/filter.h>
 
@@ -217,6 +218,11 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
+#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+int net_cls_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_cls_subsys_id);
+#endif
+
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
 	struct timeval tv;
@@ -1050,6 +1056,16 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
 	module_put(owner);
 }
 
+#ifdef CONFIG_CGROUPS
+void sock_update_classid(struct sock *sk)
+{
+	u32 classid = task_cls_classid(current);
+
+	if (classid && classid != sk->sk_classid)
+		sk->sk_classid = classid;
+}
+#endif
+
 /**
  *	sk_alloc - All socket objects are allocated here
  *	@net: the applicable net namespace
@@ -1073,6 +1089,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		sock_lock_init(sk);
 		sock_net_set(sk, get_net(net));
 		atomic_set(&sk->sk_wmem_alloc, 1);
+
+		sock_update_classid(sk);
 	}
 
 	return sk;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 221180384fd7..78ef2c5e130b 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -16,14 +16,11 @@
 #include <linux/errno.h>
 #include <linux/skbuff.h>
 #include <linux/cgroup.h>
+#include <linux/rcupdate.h>
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
-
-struct cgroup_cls_state
-{
-	struct cgroup_subsys_state css;
-	u32 classid;
-};
+#include <net/sock.h>
+#include <net/cls_cgroup.h>
 
 static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
 					       struct cgroup *cgrp);
@@ -112,6 +109,10 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	struct cls_cgroup_head *head = tp->root;
 	u32 classid;
 
+	rcu_read_lock();
+	classid = task_cls_state(current)->classid;
+	rcu_read_unlock();
+
 	/*
 	 * Due to the nature of the classifier it is required to ignore all
 	 * packets originating from softirq context as accessing `current'
@@ -122,12 +123,12 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
 	 * calls by looking at the number of nested bh disable calls because
 	 * softirqs always disables bh.
 	 */
-	if (softirq_count() != SOFTIRQ_OFFSET)
-		return -1;
-
-	rcu_read_lock();
-	classid = task_cls_state(current)->classid;
-	rcu_read_unlock();
+	if (softirq_count() != SOFTIRQ_OFFSET) {
+		/* If there is an sk_classid we'll use that. */
+		if (!skb->sk)
+			return -1;
+		classid = skb->sk->sk_classid;
+	}
 
 	if (!classid)
 		return -1;
@@ -289,18 +290,35 @@ static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
 
 static int __init init_cgroup_cls(void)
 {
-	int ret = register_tcf_proto_ops(&cls_cgroup_ops);
-	if (ret)
-		return ret;
+	int ret;
+
 	ret = cgroup_load_subsys(&net_cls_subsys);
 	if (ret)
-		unregister_tcf_proto_ops(&cls_cgroup_ops);
+		goto out;
+
+#ifndef CONFIG_NET_CLS_CGROUP
+	/* We can't use rcu_assign_pointer because this is an int. */
+	smp_wmb();
+	net_cls_subsys_id = net_cls_subsys.subsys_id;
+#endif
+
+	ret = register_tcf_proto_ops(&cls_cgroup_ops);
+	if (ret)
+		cgroup_unload_subsys(&net_cls_subsys);
+
+out:
 	return ret;
 }
 
 static void __exit exit_cgroup_cls(void)
 {
 	unregister_tcf_proto_ops(&cls_cgroup_ops);
+
+#ifndef CONFIG_NET_CLS_CGROUP
+	net_cls_subsys_id = -1;
+	synchronize_rcu();
+#endif
+
 	cgroup_unload_subsys(&net_cls_subsys);
 }
 
diff --git a/net/socket.c b/net/socket.c
index f9f7d0872cac..367d5477d00f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -94,6 +94,7 @@
 
 #include <net/compat.h>
 #include <net/wext.h>
+#include <net/cls_cgroup.h>
 
 #include <net/sock.h>
 #include <linux/netfilter.h>
@@ -558,6 +559,8 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
 	int err;
 
+	sock_update_classid(sock->sk);
+
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;
@@ -684,6 +687,8 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 {
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
 
+	sock_update_classid(sock->sk);
+
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;
@@ -777,6 +782,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	if (unlikely(!sock->ops->splice_read))
 		return -EINVAL;
 
+	sock_update_classid(sock->sk);
+
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
@@ -3069,6 +3076,8 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
 {
+	sock_update_classid(sock->sk);
+
 	if (sock->ops->sendpage)
 		return sock->ops->sendpage(sock, page, offset, size, flags);
 
-- 
cgit v1.2.3-58-ga151


From 8286274284e15b11b0f531b6ceeef21fbe00a8dd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 24 May 2010 00:14:10 -0700
Subject: tun: Update classid on packet injection

This patch makes tun update its socket classid every time we
inject a packet into the network stack.  This is so that any
updates made by the admin to the process writing packets to
tun is effected.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 2 ++
 net/core/sock.c   | 1 +
 2 files changed, 3 insertions(+)

(limited to 'net')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 97b25533e5fb..8793c2bf7f15 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -526,6 +526,8 @@ static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
 	struct sk_buff *skb;
 	int err;
 
+	sock_update_classid(sk);
+
 	/* Under a page?  Don't bother with paged skb. */
 	if (prepad + len < PAGE_SIZE || !linear)
 		linear = len;
diff --git a/net/core/sock.c b/net/core/sock.c
index a05ae7f9771e..37fe9b6adade 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1064,6 +1064,7 @@ void sock_update_classid(struct sock *sk)
 	if (classid && classid != sk->sk_classid)
 		sk->sk_classid = classid;
 }
+EXPORT_SYMBOL(sock_update_classid);
 #endif
 
 /**
-- 
cgit v1.2.3-58-ga151


From a69eee4988752c7196677958b4ed8f4c2b28499a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 24 May 2010 07:45:43 -0700
Subject: Revert "ath9k: Group Key fix for VAPs"

This reverts commit 03ceedea972a82d343fa5c2528b3952fa9e615d5, since it
breaks resume from suspend-to-ram on Rafael's Acer Ferrari One.
NetworkManager thinks everything is ok, but it can't connect to the AP
to get an IP address after the resume.

In fact, it even breaks resume for non-ath9k chipsets: reverting it also
fixes Rafael's Toshiba Protege R500 with the iwlagn driver.  As Johannes
says:

  "Indeed, this patch needs to be reverted. That mac80211 change is wrong
   and completely unnecessary."

Reported-and-requested-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Daniel Yingqiang Ma <yma.cool@gmail.com>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/wireless/ath/ath9k/main.c | 28 +++-------------------------
 include/net/mac80211.h                |  1 -
 net/mac80211/key.c                    |  1 -
 3 files changed, 3 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 893b552981a0..abfa0493236f 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -752,7 +752,6 @@ static int ath_key_config(struct ath_common *common,
 	struct ath_hw *ah = common->ah;
 	struct ath9k_keyval hk;
 	const u8 *mac = NULL;
-	u8 gmac[ETH_ALEN];
 	int ret = 0;
 	int idx;
 
@@ -776,30 +775,9 @@ static int ath_key_config(struct ath_common *common,
 	memcpy(hk.kv_val, key->key, key->keylen);
 
 	if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
-
-		if (key->ap_addr) {
-			/*
-			 * Group keys on hardware that supports multicast frame
-			 * key search use a mac that is the sender's address with
-			 * the high bit set instead of the app-specified address.
-			 */
-			memcpy(gmac, key->ap_addr, ETH_ALEN);
-			gmac[0] |= 0x80;
-			mac = gmac;
-
-			if (key->alg == ALG_TKIP)
-				idx = ath_reserve_key_cache_slot_tkip(common);
-			else
-				idx = ath_reserve_key_cache_slot(common);
-			if (idx < 0)
-				mac = NULL; /* no free key cache entries */
-		}
-
-		if (!mac) {
-			/* For now, use the default keys for broadcast keys. This may
-			 * need to change with virtual interfaces. */
-			idx = key->keyidx;
-		}
+		/* For now, use the default keys for broadcast keys. This may
+		 * need to change with virtual interfaces. */
+		idx = key->keyidx;
 	} else if (key->keyidx) {
 		if (WARN_ON(!sta))
 			return -EOPNOTSUPP;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5be900d19660..e24b0363e6cb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -831,7 +831,6 @@ struct ieee80211_key_conf {
 	u8 iv_len;
 	u8 hw_key_idx;
 	u8 flags;
-	u8 *ap_addr;
 	s8 keyidx;
 	u8 keylen;
 	u8 key[0];
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8d4b41787dcf..e8f6e3b252d8 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -140,7 +140,6 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 				     struct ieee80211_sub_if_data,
 				     u.ap);
 
-	key->conf.ap_addr = sdata->dev->dev_addr;
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
 	if (!ret) {
-- 
cgit v1.2.3-58-ga151


From d9b52dc6fd1fbb2bad645cbc86a60f984c1cb179 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yuasa@linux-mips.org>
Date: Mon, 24 May 2010 18:37:02 -0700
Subject: net/dccp: expansion of error code size

Because MIPS's EDQUOT value is 1133(0x46d).
It's larger than u8.

Signed-off-by: Yoichi Yuasa <yuasa@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 58f7bc156850..6beb6a7d6fba 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -124,9 +124,9 @@ static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
 	return queued;
 }
 
-static u8 dccp_reset_code_convert(const u8 code)
+static u16 dccp_reset_code_convert(const u8 code)
 {
-	const u8 error_code[] = {
+	const u16 error_code[] = {
 	[DCCP_RESET_CODE_CLOSED]	     = 0,	/* normal termination */
 	[DCCP_RESET_CODE_UNSPECIFIED]	     = 0,	/* nothing known */
 	[DCCP_RESET_CODE_ABORTED]	     = ECONNRESET,
@@ -148,7 +148,7 @@ static u8 dccp_reset_code_convert(const u8 code)
 
 static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
 {
-	u8 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code);
+	u16 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code);
 
 	sk->sk_err = err;
 
-- 
cgit v1.2.3-58-ga151


From 4be929be34f9bdeffa40d815d32d7d60d2c7f03b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 24 May 2010 14:33:03 -0700
Subject: kernel-wide: replace USHORT_MAX, SHORT_MAX and SHORT_MIN with
 USHRT_MAX, SHRT_MAX and SHRT_MIN

- C99 knows about USHRT_MAX/SHRT_MAX/SHRT_MIN, not
  USHORT_MAX/SHORT_MAX/SHORT_MIN.

- Make SHRT_MIN of type s16, not int, for consistency.

[akpm@linux-foundation.org: fix drivers/dma/timb_dma.c]
[akpm@linux-foundation.org: fix security/keys/keyring.c]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/dma/timb_dma.c                |  2 +-
 drivers/scsi/fcoe/fcoe.c              |  2 +-
 drivers/scsi/mpt2sas/mpt2sas_base.c   |  2 +-
 drivers/scsi/mpt2sas/mpt2sas_config.c |  2 +-
 drivers/vhost/vhost.c                 |  2 +-
 fs/nfs/super.c                        |  4 ++--
 fs/nfsd/nfsctl.c                      |  4 ++--
 fs/ocfs2/blockcheck.c                 |  4 ++--
 include/linux/kernel.h                |  6 +++---
 include/linux/sched.h                 |  2 +-
 include/net/ip.h                      |  6 +++---
 include/net/ipv6.h                    |  6 +++---
 ipc/msg.c                             | 12 ++++++------
 ipc/util.c                            |  4 ++--
 lib/vsprintf.c                        |  2 +-
 net/9p/protocol.c                     |  2 +-
 net/dccp/options.c                    |  2 +-
 net/ipv4/udp.c                        |  8 ++++----
 net/mac80211/sta_info.c               |  2 +-
 net/sunrpc/rpcb_clnt.c                |  2 +-
 security/keys/keyring.c               |  6 +++---
 21 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index 0172fa3c7a2b..a1bf77c1993f 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -188,7 +188,7 @@ static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single)
 static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
 	struct scatterlist *sg, bool last)
 {
-	if (sg_dma_len(sg) > USHORT_MAX) {
+	if (sg_dma_len(sg) > USHRT_MAX) {
 		dev_err(chan2dev(&td_chan->chan), "Too big sg element\n");
 		return -EINVAL;
 	}
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 9276121db1ef..44a07593de56 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -688,7 +688,7 @@ static int fcoe_shost_config(struct fc_lport *lport, struct device *dev)
 	}
 
 	if (!lport->vport)
-		fc_host_max_npiv_vports(lport->host) = USHORT_MAX;
+		fc_host_max_npiv_vports(lport->host) = USHRT_MAX;
 
 	snprintf(fc_host_symbolic_name(lport->host), FC_SYMBOLIC_NAME_SIZE,
 		 "%s v%s over %s", FCOE_NAME, FCOE_VERSION,
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index b830d61684dd..0ec1ed389c20 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -3757,7 +3757,7 @@ _base_reset_handler(struct MPT2SAS_ADAPTER *ioc, int reset_phase)
 		if (ioc->config_cmds.status & MPT2_CMD_PENDING) {
 			ioc->config_cmds.status |= MPT2_CMD_RESET;
 			mpt2sas_base_free_smid(ioc, ioc->config_cmds.smid);
-			ioc->config_cmds.smid = USHORT_MAX;
+			ioc->config_cmds.smid = USHRT_MAX;
 			complete(&ioc->config_cmds.done);
 		}
 		break;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_config.c b/drivers/scsi/mpt2sas/mpt2sas_config.c
index e762dd3e2fcb..c65442982d7b 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_config.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_config.c
@@ -258,7 +258,7 @@ mpt2sas_config_done(struct MPT2SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
 #ifdef CONFIG_SCSI_MPT2SAS_LOGGING
 	_config_display_some_debug(ioc, smid, "config_done", mpi_reply);
 #endif
-	ioc->config_cmds.smid = USHORT_MAX;
+	ioc->config_cmds.smid = USHRT_MAX;
 	complete(&ioc->config_cmds.done);
 	return 1;
 }
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 750effe0f98b..c6fb8e968f21 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -806,7 +806,7 @@ static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
 	count = indirect->len / sizeof desc;
 	/* Buffers are chained via a 16 bit next field, so
 	 * we can have at most 2^16 of these. */
-	if (count > USHORT_MAX + 1) {
+	if (count > USHRT_MAX + 1) {
 		vq_err(vq, "Indirect buffer length too big: %d\n",
 		       indirect->len);
 		return -E2BIG;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2f8b1157daa2..04214fc5c304 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1060,7 +1060,7 @@ static int nfs_parse_mount_options(char *raw,
 				goto out_nomem;
 			rc = strict_strtoul(string, 10, &option);
 			kfree(string);
-			if (rc != 0 || option > USHORT_MAX)
+			if (rc != 0 || option > USHRT_MAX)
 				goto out_invalid_value;
 			mnt->nfs_server.port = option;
 			break;
@@ -1181,7 +1181,7 @@ static int nfs_parse_mount_options(char *raw,
 				goto out_nomem;
 			rc = strict_strtoul(string, 10, &option);
 			kfree(string);
-			if (rc != 0 || option > USHORT_MAX)
+			if (rc != 0 || option > USHRT_MAX)
 				goto out_invalid_value;
 			mnt->mount_server.port = option;
 			break;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index bc3194ea01f5..508941c23af7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -998,7 +998,7 @@ static ssize_t __write_ports_addxprt(char *buf)
 	if (sscanf(buf, "%15s %4u", transport, &port) != 2)
 		return -EINVAL;
 
-	if (port < 1 || port > USHORT_MAX)
+	if (port < 1 || port > USHRT_MAX)
 		return -EINVAL;
 
 	err = nfsd_create_serv();
@@ -1040,7 +1040,7 @@ static ssize_t __write_ports_delxprt(char *buf)
 	if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2)
 		return -EINVAL;
 
-	if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL)
+	if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL)
 		return -EINVAL;
 
 	xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port);
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index b7428c5d0d3b..ec6d12339593 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -403,7 +403,7 @@ void ocfs2_block_check_compute(void *data, size_t blocksize,
 	 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
 	 * larger than 16 bits.
 	 */
-	BUG_ON(ecc > USHORT_MAX);
+	BUG_ON(ecc > USHRT_MAX);
 
 	bc->bc_crc32e = cpu_to_le32(crc);
 	bc->bc_ecc = cpu_to_le16((u16)ecc);
@@ -508,7 +508,7 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
 	 * No ecc'd ocfs2 structure is larger than 4K, so ecc will be no
 	 * larger than 16 bits.
 	 */
-	BUG_ON(ecc > USHORT_MAX);
+	BUG_ON(ecc > USHRT_MAX);
 
 	bc->bc_crc32e = cpu_to_le32(crc);
 	bc->bc_ecc = cpu_to_le16((u16)ecc);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index cc5e3ffe9fce..a2e7c32e17e7 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -24,9 +24,9 @@
 extern const char linux_banner[];
 extern const char linux_proc_banner[];
 
-#define USHORT_MAX	((u16)(~0U))
-#define SHORT_MAX	((s16)(USHORT_MAX>>1))
-#define SHORT_MIN	(-SHORT_MAX - 1)
+#define USHRT_MAX	((u16)(~0U))
+#define SHRT_MAX	((s16)(USHRT_MAX>>1))
+#define SHRT_MIN	((s16)(-SHRT_MAX - 1))
 #define INT_MAX		((int)(~0U>>1))
 #define INT_MIN		(-INT_MAX - 1)
 #define UINT_MAX	(~0U)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 415b8f8a3f45..c0151ffd3541 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -384,7 +384,7 @@ struct user_namespace;
  * 1-3 now and depends on arch. We use "5" as safe margin, here.
  */
 #define MAPCOUNT_ELF_CORE_MARGIN	(5)
-#define DEFAULT_MAX_MAP_COUNT	(USHORT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
+#define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
 
 extern int sysctl_max_map_count;
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 63548f0a44b1..452f229c380a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -358,11 +358,11 @@ enum ip_defrag_users {
 	IP_DEFRAG_LOCAL_DELIVER,
 	IP_DEFRAG_CALL_RA_CHAIN,
 	IP_DEFRAG_CONNTRACK_IN,
-	__IP_DEFRAG_CONNTRACK_IN_END	= IP_DEFRAG_CONNTRACK_IN + USHORT_MAX,
+	__IP_DEFRAG_CONNTRACK_IN_END	= IP_DEFRAG_CONNTRACK_IN + USHRT_MAX,
 	IP_DEFRAG_CONNTRACK_OUT,
-	__IP_DEFRAG_CONNTRACK_OUT_END	= IP_DEFRAG_CONNTRACK_OUT + USHORT_MAX,
+	__IP_DEFRAG_CONNTRACK_OUT_END	= IP_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
 	IP_DEFRAG_CONNTRACK_BRIDGE_IN,
-	__IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX,
+	__IP_DEFRAG_CONNTRACK_BRIDGE_IN = IP_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
 	IP_DEFRAG_VS_IN,
 	IP_DEFRAG_VS_OUT,
 	IP_DEFRAG_VS_FWD
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index eba5cc00325a..2600b69757b8 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -354,11 +354,11 @@ struct inet_frag_queue;
 enum ip6_defrag_users {
 	IP6_DEFRAG_LOCAL_DELIVER,
 	IP6_DEFRAG_CONNTRACK_IN,
-	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHORT_MAX,
+	__IP6_DEFRAG_CONNTRACK_IN	= IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX,
 	IP6_DEFRAG_CONNTRACK_OUT,
-	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHORT_MAX,
+	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
 	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
-	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHORT_MAX,
+	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
 };
 
 struct ip6_create_arg {
diff --git a/ipc/msg.c b/ipc/msg.c
index 9547cb7ac313..747b65507a91 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -345,19 +345,19 @@ copy_msqid_to_user(void __user *buf, struct msqid64_ds *in, int version)
 		out.msg_rtime		= in->msg_rtime;
 		out.msg_ctime		= in->msg_ctime;
 
-		if (in->msg_cbytes > USHORT_MAX)
-			out.msg_cbytes	= USHORT_MAX;
+		if (in->msg_cbytes > USHRT_MAX)
+			out.msg_cbytes	= USHRT_MAX;
 		else
 			out.msg_cbytes	= in->msg_cbytes;
 		out.msg_lcbytes		= in->msg_cbytes;
 
-		if (in->msg_qnum > USHORT_MAX)
-			out.msg_qnum	= USHORT_MAX;
+		if (in->msg_qnum > USHRT_MAX)
+			out.msg_qnum	= USHRT_MAX;
 		else
 			out.msg_qnum	= in->msg_qnum;
 
-		if (in->msg_qbytes > USHORT_MAX)
-			out.msg_qbytes	= USHORT_MAX;
+		if (in->msg_qbytes > USHRT_MAX)
+			out.msg_qbytes	= USHRT_MAX;
 		else
 			out.msg_qbytes	= in->msg_qbytes;
 		out.msg_lqbytes		= in->msg_qbytes;
diff --git a/ipc/util.c b/ipc/util.c
index 79ce84e890f7..69a0cc13d966 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -124,8 +124,8 @@ void ipc_init_ids(struct ipc_ids *ids)
 	ids->seq = 0;
 	{
 		int seq_limit = INT_MAX/SEQ_MULTIPLIER;
-		if (seq_limit > USHORT_MAX)
-			ids->seq_max = USHORT_MAX;
+		if (seq_limit > USHRT_MAX)
+			ids->seq_max = USHRT_MAX;
 		 else
 		 	ids->seq_max = seq_limit;
 	}
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 46d34b0b74a8..20c95121d8a1 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1980,7 +1980,7 @@ int vsscanf(const char *buf, const char *fmt, va_list args)
 		{
 			char *s = (char *)va_arg(args, char *);
 			if (field_width == -1)
-				field_width = SHORT_MAX;
+				field_width = SHRT_MAX;
 			/* first, skip leading white space in buffer */
 			str = skip_spaces(str);
 
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 77d3aab4036b..149f82160130 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -394,7 +394,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
 				const char *sptr = va_arg(ap, const char *);
 				int16_t len = 0;
 				if (sptr)
-					len = MIN(strlen(sptr), USHORT_MAX);
+					len = MIN(strlen(sptr), USHRT_MAX);
 
 				errcode = p9pdu_writef(pdu, proto_version,
 								"w", len);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 1b08cae9c65b..07395f861d35 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -296,7 +296,7 @@ static inline u8 dccp_ndp_len(const u64 ndp)
 {
 	if (likely(ndp <= 0xFF))
 		return 1;
-	return likely(ndp <= USHORT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
+	return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
 }
 
 int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9de6a698f91d..baeec29fe0f1 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1686,8 +1686,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Illegal coverage: use default (8) */
 			val = 8;
-		else if (val > USHORT_MAX)
-			val = USHORT_MAX;
+		else if (val > USHRT_MAX)
+			val = USHRT_MAX;
 		up->pcslen = val;
 		up->pcflag |= UDPLITE_SEND_CC;
 		break;
@@ -1700,8 +1700,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 			return -ENOPROTOOPT;
 		if (val != 0 && val < 8) /* Avoid silly minimal values.       */
 			val = 8;
-		else if (val > USHORT_MAX)
-			val = USHORT_MAX;
+		else if (val > USHRT_MAX)
+			val = USHRT_MAX;
 		up->pcrlen = val;
 		up->pcflag |= UDPLITE_RECV_CC;
 		break;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 730197591ab5..ba9360a475b0 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -259,7 +259,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	skb_queue_head_init(&sta->tx_filtered);
 
 	for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
-		sta->last_seq_ctrl[i] = cpu_to_le16(USHORT_MAX);
+		sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	printk(KERN_DEBUG "%s: Allocated STA %pM\n",
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 121105355f60..dac219a56ae1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -783,7 +783,7 @@ static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p,
 	port = ntohl(*p);
 	dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid,
 			task->tk_msg.rpc_proc->p_name, port);
-	if (unlikely(port > USHORT_MAX))
+	if (unlikely(port > USHRT_MAX))
 		return -EIO;
 
 	rpcb->r_port = port;
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index ef03a82a0135..d37f713e73ce 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -669,7 +669,7 @@ static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
 	struct keyring_list *klist =
 		container_of(rcu, struct keyring_list, rcu);
 
-	if (klist->delkey != USHORT_MAX)
+	if (klist->delkey != USHRT_MAX)
 		key_put(klist->keys[klist->delkey]);
 	kfree(klist);
 }
@@ -746,7 +746,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 			max += klist->maxkeys;
 
 		ret = -ENFILE;
-		if (max > USHORT_MAX - 1)
+		if (max > USHRT_MAX - 1)
 			goto error_quota;
 		size = sizeof(*klist) + sizeof(struct key *) * max;
 		if (size > PAGE_SIZE)
@@ -763,7 +763,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type,
 			       sizeof(struct key *) * klist->nkeys);
 			nklist->delkey = klist->nkeys;
 			nklist->nkeys = klist->nkeys + 1;
-			klist->delkey = USHORT_MAX;
+			klist->delkey = USHRT_MAX;
 		} else {
 			nklist->nkeys = 1;
 			nklist->delkey = 0;
-- 
cgit v1.2.3-58-ga151


From ef7ffe8f06895312aeb08a5f8a1d4c90e34335ea Mon Sep 17 00:00:00 2001
From: Alex Riesen <raa.lkml@gmail.com>
Date: Mon, 24 May 2010 14:33:05 -0700
Subject: sunrpc: use formatting of module name in SUNRPC

gcc-4.3.3 produces the warning:
  "format not a string literal and no format arguments"

Signed-off-by: Alex Riesen <raa.lkml@gmail.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Chuck Lever <cel@citi.umich.edu>
Cc: David S. Miller <davem@davemloft.net>
Acked-by: Tom Talpey <tmtalpey@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/sunrpc/xprt.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3fc325399ee4..dcd0132396ba 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -166,7 +166,6 @@ EXPORT_SYMBOL_GPL(xprt_unregister_transport);
 int xprt_load_transport(const char *transport_name)
 {
 	struct xprt_class *t;
-	char module_name[sizeof t->name + 5];
 	int result;
 
 	result = 0;
@@ -178,9 +177,7 @@ int xprt_load_transport(const char *transport_name)
 		}
 	}
 	spin_unlock(&xprt_list_lock);
-	strcpy(module_name, "xprt");
-	strncat(module_name, transport_name, sizeof t->name);
-	result = request_module(module_name);
+	result = request_module("xprt%s", transport_name);
 out:
 	return result;
 }
-- 
cgit v1.2.3-58-ga151


From e513480e28cdfd868755f05c1a654fcfcee58070 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Tue, 25 May 2010 16:08:39 -0700
Subject: Phonet: fix potential use-after-free in pep_sock_close()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

sk_common_release() might destroy our last reference to the socket.
So an extra temporary reference is needed during cleanup.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index af4d38bc3b22..7b048a35ca58 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -626,6 +626,7 @@ static void pep_sock_close(struct sock *sk, long timeout)
 	struct pep_sock *pn = pep_sk(sk);
 	int ifindex = 0;
 
+	sock_hold(sk); /* keep a reference after sk_common_release() */
 	sk_common_release(sk);
 
 	lock_sock(sk);
@@ -644,6 +645,7 @@ static void pep_sock_close(struct sock *sk, long timeout)
 
 	if (ifindex)
 		gprs_detach(sk);
+	sock_put(sk);
 }
 
 static int pep_wait_connreq(struct sock *sk, int noblock)
-- 
cgit v1.2.3-58-ga151


From ed0f160ad674407adb3aba499444f71c83289c63 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 26 May 2010 00:38:56 -0700
Subject: ipmr: off by one in __ipmr_fill_mroute()

This fixes a smatch warning:
	net/ipv4/ipmr.c +1917 __ipmr_fill_mroute(12) error: buffer overflow
	'(mrt)->vif_table' 32 <= 32

The ipv6 version had the same issue.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c  | 2 +-
 net/ipv6/ip6mr.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 45889103b3e2..856123fe32f9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1911,7 +1911,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	struct rtattr *mp_head;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mfc_parent > MAXVIFS)
+	if (c->mfc_parent >= MAXVIFS)
 		return -ENOENT;
 
 	if (VIF_EXISTS(mrt, c->mfc_parent))
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index bd9e7d3e9c8e..073071f2b75b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2017,7 +2017,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 	struct rtattr *mp_head;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
-	if (c->mf6c_parent > MAXMIFS)
+	if (c->mf6c_parent >= MAXMIFS)
 		return -ENOENT;
 
 	if (MIF_EXISTS(mrt, c->mf6c_parent))
-- 
cgit v1.2.3-58-ga151


From 0a68b0bed08eeb7ec62e0125f17856b1ccb1ea4b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 26 May 2010 08:42:24 -0400
Subject: sunrpc: fix leak on error on socket xprt setup

Also collect exit code together while we're at it.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b7cd8cccbe72..2a9675136c68 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2293,6 +2293,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
+	struct rpc_xprt *ret;
 
 	xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -2330,8 +2331,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 		xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
 		break;
 	default:
-		kfree(xprt);
-		return ERR_PTR(-EAFNOSUPPORT);
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
 	}
 
 	if (xprt_bound(xprt))
@@ -2346,10 +2347,11 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
-
+	ret = ERR_PTR(-EINVAL);
+out_err:
 	kfree(xprt->slot);
 	kfree(xprt);
-	return ERR_PTR(-EINVAL);
+	return ret;
 }
 
 static const struct rpc_timeout xs_tcp_default_timeout = {
@@ -2368,6 +2370,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
+	struct rpc_xprt *ret;
 
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -2403,8 +2406,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 		xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
 		break;
 	default:
-		kfree(xprt);
-		return ERR_PTR(-EAFNOSUPPORT);
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
 	}
 
 	if (xprt_bound(xprt))
@@ -2420,10 +2423,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
-
+	ret = ERR_PTR(-EINVAL);
+out_err:
 	kfree(xprt->slot);
 	kfree(xprt);
-	return ERR_PTR(-EINVAL);
+	return ret;
 }
 
 /**
@@ -2437,6 +2441,7 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
 	struct svc_sock *bc_sock;
+	struct rpc_xprt *ret;
 
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -2476,8 +2481,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 				   RPCBIND_NETID_TCP6);
 		break;
 	default:
-		kfree(xprt);
-		return ERR_PTR(-EAFNOSUPPORT);
+		ret = ERR_PTR(-EAFNOSUPPORT);
+		goto out_err;
 	}
 
 	if (xprt_bound(xprt))
@@ -2499,9 +2504,11 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
+	ret = ERR_PTR(-EINVAL);
+out_err:
 	kfree(xprt->slot);
 	kfree(xprt);
-	return ERR_PTR(-EINVAL);
+	return ret;
 }
 
 static struct xprt_class	xs_udp_transport = {
-- 
cgit v1.2.3-58-ga151


From a56635a56f2afb3d22d9ce07e8f8d69537416b2d Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 26 May 2010 05:56:48 +0000
Subject: net/iucv: Add missing spin_unlock

Add a spin_unlock missing on the error path.  There seems like no reason
why the lock should continue to be held if the kzalloc fail.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression E1;
@@

* spin_lock(E1,...);
  <+... when != E1
  if (...) {
    ... when != E1
*   return ...;
  }
  ...+>
* spin_unlock(E1,...);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c8b4599a752e..9637e45744fa 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1619,7 +1619,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg)
 save_message:
 	save_msg = kzalloc(sizeof(struct sock_msg_q), GFP_ATOMIC | GFP_DMA);
 	if (!save_msg)
-		return;
+		goto out_unlock;
 	save_msg->path = path;
 	save_msg->msg = *msg;
 
-- 
cgit v1.2.3-58-ga151


From 8a74ad60a546b13bd1096b2a61a7a5c6fd9ae17c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 26 May 2010 19:20:18 +0000
Subject: net: fix lock_sock_bh/unlock_sock_bh

This new sock lock primitive was introduced to speedup some user context
socket manipulation. But it is unsafe to protect two threads, one using
regular lock_sock/release_sock, one using lock_sock_bh/unlock_sock_bh

This patch changes lock_sock_bh to be careful against 'owned' state.
If owned is found to be set, we must take the slow path.
lock_sock_bh() now returns a boolean to say if the slow path was taken,
and this boolean is used at unlock_sock_bh time to call the appropriate
unlock function.

After this change, BH are either disabled or enabled during the
lock_sock_bh/unlock_sock_bh protected section. This might be misleading,
so we rename these functions to lock_sock_fast()/unlock_sock_fast().

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h  | 20 ++++++++++++++------
 net/core/datagram.c |  6 ++++--
 net/core/sock.c     | 33 +++++++++++++++++++++++++++++++++
 net/ipv4/udp.c      | 14 ++++++++------
 net/ipv6/udp.c      |  5 +++--
 5 files changed, 62 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index d2a71b04a5ae..ca241ea14875 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1026,15 +1026,23 @@ extern void release_sock(struct sock *sk);
 				SINGLE_DEPTH_NESTING)
 #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
 
-static inline void lock_sock_bh(struct sock *sk)
+extern bool lock_sock_fast(struct sock *sk);
+/**
+ * unlock_sock_fast - complement of lock_sock_fast
+ * @sk: socket
+ * @slow: slow mode
+ *
+ * fast unlock socket for user context.
+ * If slow mode is on, we call regular release_sock()
+ */
+static inline void unlock_sock_fast(struct sock *sk, bool slow)
 {
-	spin_lock_bh(&sk->sk_lock.slock);
+	if (slow)
+		release_sock(sk);
+	else
+		spin_unlock_bh(&sk->sk_lock.slock);
 }
 
-static inline void unlock_sock_bh(struct sock *sk)
-{
-	spin_unlock_bh(&sk->sk_lock.slock);
-}
 
 extern struct sock		*sk_alloc(struct net *net, int family,
 					  gfp_t priority,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e0097531417a..f5b6f43a4c2e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,15 +229,17 @@ EXPORT_SYMBOL(skb_free_datagram);
 
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
+	bool slow;
+
 	if (likely(atomic_read(&skb->users) == 1))
 		smp_rmb();
 	else if (likely(!atomic_dec_and_test(&skb->users)))
 		return;
 
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	skb_orphan(skb);
 	sk_mem_reclaim_partial(sk);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	/* skb is now orphaned, can be freed outside of locked section */
 	__kfree_skb(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 37fe9b6adade..2cf7f9f7e775 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2007,6 +2007,39 @@ void release_sock(struct sock *sk)
 }
 EXPORT_SYMBOL(release_sock);
 
+/**
+ * lock_sock_fast - fast version of lock_sock
+ * @sk: socket
+ *
+ * This version should be used for very small section, where process wont block
+ * return false if fast path is taken
+ *   sk_lock.slock locked, owned = 0, BH disabled
+ * return true if slow path is taken
+ *   sk_lock.slock unlocked, owned = 1, BH enabled
+ */
+bool lock_sock_fast(struct sock *sk)
+{
+	might_sleep();
+	spin_lock_bh(&sk->sk_lock.slock);
+
+	if (!sk->sk_lock.owned)
+		/*
+		 * Note : We must disable BH
+		 */
+		return false;
+
+	__lock_sock(sk);
+	sk->sk_lock.owned = 1;
+	spin_unlock(&sk->sk_lock.slock);
+	/*
+	 * The sk_lock has mutex_lock() semantics here:
+	 */
+	mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+	local_bh_enable();
+	return true;
+}
+EXPORT_SYMBOL(lock_sock_fast);
+
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct timeval tv;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9de6a698f91d..b9d0d409516f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1063,10 +1063,11 @@ static unsigned int first_packet_length(struct sock *sk)
 	spin_unlock_bh(&rcvq->lock);
 
 	if (!skb_queue_empty(&list_kill)) {
-		lock_sock_bh(sk);
+		bool slow = lock_sock_fast(sk);
+
 		__skb_queue_purge(&list_kill);
 		sk_mem_reclaim_partial(sk);
-		unlock_sock_bh(sk);
+		unlock_sock_fast(sk, slow);
 	}
 	return res;
 }
@@ -1123,6 +1124,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	int peeked;
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
+	bool slow;
 
 	/*
 	 *	Check any passed addresses
@@ -1197,10 +1199,10 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
 		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	if (noblock)
 		return -EAGAIN;
@@ -1625,9 +1627,9 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
-	lock_sock_bh(sk);
+	bool slow = lock_sock_fast(sk);
 	udp_flush_pending_frames(sk);
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 }
 
 /*
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3d7a2c0b836a..87be58673b55 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -328,6 +328,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	int err;
 	int is_udplite = IS_UDPLITE(sk);
 	int is_udp4;
+	bool slow;
 
 	if (addr_len)
 		*addr_len=sizeof(struct sockaddr_in6);
@@ -424,7 +425,7 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock_bh(sk);
+	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags)) {
 		if (is_udp4)
 			UDP_INC_STATS_USER(sock_net(sk),
@@ -433,7 +434,7 @@ csum_copy_err:
 			UDP6_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
 	}
-	unlock_sock_bh(sk);
+	unlock_sock_fast(sk, slow);
 
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
-- 
cgit v1.2.3-58-ga151


From 92e99a98bb44ccce6c902ea077b49cafcfb80025 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 26 May 2010 14:43:33 -0700
Subject: iucv: convert cpu notifier to return encapsulate errno value

By the previous modification, the cpu notifier can return encapsulate
errno value.  This converts the cpu notifiers for iucv.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Ursula Braun <ursula.braun@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 net/iucv/iucv.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index fd8b28361a64..f28ad2cc8428 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -632,13 +632,14 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 		iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data),
 					GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 		if (!iucv_irq_data[cpu])
-			return NOTIFY_BAD;
+			return notifier_from_errno(-ENOMEM);
+
 		iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param),
 				     GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
 		if (!iucv_param[cpu]) {
 			kfree(iucv_irq_data[cpu]);
 			iucv_irq_data[cpu] = NULL;
-			return NOTIFY_BAD;
+			return notifier_from_errno(-ENOMEM);
 		}
 		iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param),
 					GFP_KERNEL|GFP_DMA, cpu_to_node(cpu));
@@ -647,7 +648,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 			iucv_param[cpu] = NULL;
 			kfree(iucv_irq_data[cpu]);
 			iucv_irq_data[cpu] = NULL;
-			return NOTIFY_BAD;
+			return notifier_from_errno(-ENOMEM);
 		}
 		break;
 	case CPU_UP_CANCELED:
@@ -677,7 +678,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self,
 		cpu_clear(cpu, cpumask);
 		if (cpus_empty(cpumask))
 			/* Can't offline last IUCV enabled cpu. */
-			return NOTIFY_BAD;
+			return notifier_from_errno(-EINVAL);
 		smp_call_function_single(cpu, iucv_retrieve_cpu, NULL, 1);
 		if (cpus_empty(iucv_irq_cpumask))
 			smp_call_function_single(first_cpu(iucv_buffer_cpumask),
-- 
cgit v1.2.3-58-ga151


From a47311380e094bb201be8a818370c73c3f52122c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 27 May 2010 16:09:39 -0700
Subject: net: fix __neigh_event_send()

commit 7fee226ad23 (net: add a noref bit on skb dst) missed one spot
where an skb is enqueued, with a possibly not refcounted dst entry.

__neigh_event_send() inserts skb into arp_queue, so we must make sure
dst entry is refcounted, or dst entry can be freed by garbage collector
after caller exits from rcu protected section.

Reported-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index bff37908bd55..6ba1c0eece03 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -934,6 +934,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 				kfree_skb(buff);
 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
 			}
+			skb_dst_force(skb);
 			__skb_queue_tail(&neigh->arp_queue, skb);
 		}
 		rc = 1;
-- 
cgit v1.2.3-58-ga151


From 0aa68271510ae2b221d4b60892103837be63afe4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 27 May 2010 16:14:30 -0700
Subject: ipv6: Add GSO support on forwarding path

Currently we disallow GSO packets on the IPv6 forward path.
This patch fixes this.

Note that I discovered that our existing GSO MTU checks (e.g.,
IPv4 forwarding) are buggy in that they skip the check altogether,
when they really should be checking gso_size + header instead.

I have also been lazy here in that I haven't bothered to segment
the GSO packet by hand before generating an ICMP message.  Someone
should add that to be 100% correct.

Reported-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cd963f64e27c..89425af0684c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -507,7 +507,7 @@ int ip6_forward(struct sk_buff *skb)
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if (skb->len > mtu) {
+	if (skb->len > mtu && !skb_is_gso(skb)) {
 		/* Again, force OUTPUT device used as source address */
 		skb->dev = dst->dev;
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-- 
cgit v1.2.3-58-ga151


From 50636af715ac1ceb1872bd29a4bdcc68975c3263 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 28 May 2010 03:41:17 -0700
Subject: xt_tee: use skb_dst_drop()

After commit 7fee226a (net: add a noref bit on skb dst), its wrong to
use : dst_release(skb_dst(skb)), since we could decrement a refcount
while skb dst was not refcounted.

We should use skb_dst_drop(skb) instead.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/xt_TEE.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index d7920d9f49e9..859d9fd429c8 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -76,7 +76,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	if (ip_route_output_key(net, &rt, &fl) != 0)
 		return false;
 
-	dst_release(skb_dst(skb));
+	skb_dst_drop(skb);
 	skb_dst_set(skb, &rt->u.dst);
 	skb->dev      = rt->u.dst.dev;
 	skb->protocol = htons(ETH_P_IP);
@@ -157,7 +157,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	if (dst == NULL)
 		return false;
 
-	dst_release(skb_dst(skb));
+	skb_dst_drop(skb);
 	skb_dst_set(skb, dst);
 	skb->dev      = dst->dev;
 	skb->protocol = htons(ETH_P_IPV6);
-- 
cgit v1.2.3-58-ga151


From 8ca9418350eccd5dd2659931807c1901224dd638 Mon Sep 17 00:00:00 2001
From: Scott Feldman <scofeldm@cisco.com>
Date: Fri, 28 May 2010 03:42:18 -0700
Subject: netlink: bug fix: don't overrun skbs on vf_port dump

Noticed by Patrick McHardy: was continuing to fill skb after a
nla_put_failure, ignoring the size calculated by upper layer.  Now,
return -EMSGSIZE on any overruns, but also allow netdev to
fail ndo_get_vf_port with error other than -EMSGSIZE, thus unwinding
nest.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7ab86f3a1ea4..7331bb2f6b9c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -722,14 +722,13 @@ static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
 
 	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
 		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
-		if (!vf_port) {
-			nla_nest_cancel(skb, vf_ports);
-			return -EMSGSIZE;
-		}
+		if (!vf_port)
+			goto nla_put_failure;
 		NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
 		err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+		if (err == -EMSGSIZE)
+			goto nla_put_failure;
 		if (err) {
-nla_put_failure:
 			nla_nest_cancel(skb, vf_port);
 			continue;
 		}
@@ -739,6 +738,10 @@ nla_put_failure:
 	nla_nest_end(skb, vf_ports);
 
 	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, vf_ports);
+	return -EMSGSIZE;
 }
 
 static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
@@ -753,7 +756,7 @@ static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
 	err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
 	if (err) {
 		nla_nest_cancel(skb, port_self);
-		return err;
+		return (err == -EMSGSIZE) ? err : 0;
 	}
 
 	nla_nest_end(skb, port_self);
-- 
cgit v1.2.3-58-ga151


From 045de01a174d9f0734f657eb4b3313d89b4fd5ad Mon Sep 17 00:00:00 2001
From: Scott Feldman <scofeldm@cisco.com>
Date: Fri, 28 May 2010 03:42:43 -0700
Subject: netlink: bug fix: wrong size was calculated for vfinfo list blob

The wrong size was being calculated for vfinfo.  In one case, it was over-
calculating using nlmsg_total_size on attrs, in another case, it was
under-calculating by assuming ifla_vf_* structs are packed together, but
each struct is it's own attr w/ hdr (and padding).

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7331bb2f6b9c..1a2af24e9e3d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -650,11 +650,12 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
 	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
 
 		int num_vfs = dev_num_vf(dev->dev.parent);
-		size_t size = nlmsg_total_size(sizeof(struct nlattr));
-		size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
-		size += num_vfs * (sizeof(struct ifla_vf_mac) +
-				  sizeof(struct ifla_vf_vlan) +
-				  sizeof(struct ifla_vf_tx_rate));
+		size_t size = nla_total_size(sizeof(struct nlattr));
+		size += nla_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs *
+			(nla_total_size(sizeof(struct ifla_vf_mac)) +
+			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
+			 nla_total_size(sizeof(struct ifla_vf_tx_rate)));
 		return size;
 	} else
 		return 0;
-- 
cgit v1.2.3-58-ga151


From 6057fd78a8dcce6269f029b967051d5a2e9b0895 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 28 May 2010 23:02:35 -0700
Subject: IPv6: fix Mobile IPv6 regression

Commit f4f914b5 (net: ipv6 bind to device issue) caused
a regression with Mobile IPv6 when it changed the meaning
of fl->oif to become a strict requirement of the route
lookup.  Instead, only force strict mode when
sk->sk_bound_dev_if is set on the calling socket, getting
the intended behavior and fixing the regression.

Tested-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 294cbe8b0725..252d76199c41 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -814,7 +814,7 @@ struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
 {
 	int flags = 0;
 
-	if (fl->oif || rt6_need_strict(&fl->fl6_dst))
+	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
 		flags |= RT6_LOOKUP_F_IFACE;
 
 	if (!ipv6_addr_any(&fl->fl6_src))
-- 
cgit v1.2.3-58-ga151


From 5b0daa3474d52bed906c4d5e92b44e10148c6972 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 29 May 2010 00:12:13 -0700
Subject: skb: make skb_recycle_check() return a bool value

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 +-
 net/core/skbuff.c      | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7cdfb4d52847..bf243fc54959 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -501,7 +501,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, 1, -1);
 }
 
-extern int skb_recycle_check(struct sk_buff *skb, int skb_size);
+extern bool skb_recycle_check(struct sk_buff *skb, int skb_size);
 
 extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4c11000a96aa..4667d4d7c162 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -482,22 +482,22 @@ EXPORT_SYMBOL(consume_skb);
  *	reference count dropping and cleans up the skbuff as if it
  *	just came from __alloc_skb().
  */
-int skb_recycle_check(struct sk_buff *skb, int skb_size)
+bool skb_recycle_check(struct sk_buff *skb, int skb_size)
 {
 	struct skb_shared_info *shinfo;
 
 	if (irqs_disabled())
-		return 0;
+		return false;
 
 	if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
-		return 0;
+		return false;
 
 	skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD);
 	if (skb_end_pointer(skb) - skb->head < skb_size)
-		return 0;
+		return false;
 
 	if (skb_shared(skb) || skb_cloned(skb))
-		return 0;
+		return false;
 
 	skb_release_head_state(skb);
 
@@ -509,7 +509,7 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
 	skb->data = skb->head + NET_SKB_PAD;
 	skb_reset_tail_pointer(skb);
 
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL(skb_recycle_check);
 
-- 
cgit v1.2.3-58-ga151


From 5daf47bb4e708fde32c1856a0d049e3c3d03c36c Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 26 May 2010 05:54:21 +0000
Subject: net/rds: Add missing mutex_unlock

Add a mutex_unlock missing on the error path.  In each case, whenever the
label out is reached from elsewhere in the function, mutex is not locked.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression E1;
@@

* mutex_lock(E1);
  <+... when != E1
  if (...) {
    ... when != E1
*   return ...;
  }
  ...+>
* mutex_unlock(E1);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Reviewed-by: Zach Brown <zach.brown@oracle.com>
Acked-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_cm.c | 1 +
 net/rds/iw_cm.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 10ed0d55f759..f68832798db2 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -475,6 +475,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 	err = rds_ib_setup_qp(conn);
 	if (err) {
 		rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
+		mutex_unlock(&conn->c_cm_lock);
 		goto out;
 	}
 
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a9d951b4fbae..b5dd6ac39be8 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -452,6 +452,7 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
 	err = rds_iw_setup_qp(conn);
 	if (err) {
 		rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
+		mutex_unlock(&conn->c_cm_lock);
 		goto out;
 	}
 
-- 
cgit v1.2.3-58-ga151


From 97dc875f90a7b88a9fa476c256345c0d40fcdf6c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 26 May 2010 05:16:48 +0000
Subject: caif: unlock on error path in cfserl_receive()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There was an spin_unlock missing on the error path.  The spin_lock was
tucked in with the declarations so it was hard to spot.  I added a new
line.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Brændeland <sjurbren@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfserl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index cb4325a3dc83..965c5baace40 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -59,16 +59,18 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 	u8 stx = CFSERL_STX;
 	int ret;
 	u16 expectlen = 0;
+
 	caif_assert(newpkt != NULL);
 	spin_lock(&layr->sync);
 
 	if (layr->incomplete_frm != NULL) {
-
 		layr->incomplete_frm =
 		    cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
 		pkt = layr->incomplete_frm;
-		if (pkt == NULL)
+		if (pkt == NULL) {
+			spin_unlock(&layr->sync);
 			return -ENOMEM;
+		}
 	} else {
 		pkt = newpkt;
 	}
-- 
cgit v1.2.3-58-ga151


From 7dfde179c38056b91d51e60f3d50902387f27c84 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 26 May 2010 00:44:44 +0000
Subject: Phonet: listening socket lock protects the connected socket list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The accept()'d socket need to be unhashed while the (listen()'ing)
socket lock is held. This fixes a race condition that could lead to an
OOPS.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 7b048a35ca58..94d72e85a475 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1045,12 +1045,12 @@ static void pep_sock_unhash(struct sock *sk)
 	lock_sock(sk);
 	if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
 		skparent = pn->listener;
-		sk_del_node_init(sk);
 		release_sock(sk);
 
-		sk = skparent;
 		pn = pep_sk(skparent);
-		lock_sock(sk);
+		lock_sock(skparent);
+		sk_del_node_init(sk);
+		sk = skparent;
 	}
 	/* Unhash a listening sock only when it is closed
 	 * and all of its active connected pipes are closed. */
-- 
cgit v1.2.3-58-ga151


From 2903037400a26e7c0cc93ab75a7d62abfacdf485 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 29 May 2010 00:20:48 -0700
Subject: net: fix sk_forward_alloc corruptions

As David found out, sock_queue_err_skb() should be called with socket
lock hold, or we risk sk_forward_alloc corruption, since we use non
atomic operations to update this field.

This patch adds bh_lock_sock()/bh_unlock_sock() pair to three spots.
(BH already disabled)

1) skb_tstamp_tx()
2) Before calling ip_icmp_error(), in __udp4_lib_err()
3) Before calling ipv6_icmp_error(), in __udp6_lib_err()

Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 4 ++++
 net/ipv4/udp.c    | 2 ++
 net/ipv6/udp.c    | 6 ++++--
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4667d4d7c162..f2913ae2b52c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2992,7 +2992,11 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	memset(serr, 0, sizeof(*serr));
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+
+	bh_lock_sock(sk);
 	err = sock_queue_err_skb(sk, skb);
+	bh_unlock_sock(sk);
+
 	if (err)
 		kfree_skb(skb);
 }
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b9d0d409516f..acdc9be833cc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -634,7 +634,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
 	} else {
+		bh_lock_sock(sk);
 		ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
+		bh_unlock_sock(sk);
 	}
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 87be58673b55..3048f906c042 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -466,9 +466,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
 		goto out;
 
-	if (np->recverr)
+	if (np->recverr) {
+		bh_lock_sock(sk);
 		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
-
+		bh_unlock_sock(sk);
+	}
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
-- 
cgit v1.2.3-58-ga151


From 27f39c73e63833b4c081a0d681d88b4184a0491d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 19 May 2010 22:07:23 +0000
Subject: net: Use __this_cpu_inc() in fast path

This patch saves 224 bytes of text on my machine.

__this_cpu_inc() generates a single instruction, using no scratch
registers :

  65 ff 04 25 a8 30 01 00      incl   %gs:0x130a8

instead of :

  48 c7 c2 80 30 01 00         mov    $0x13080,%rdx
  65 48 8b 04 25 88 ea 00 00   mov    %gs:0xea88,%rax
  83 44 10 28 01               addl   $0x1,0x28(%rax,%rdx,1)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c   | 3 +--
 net/ipv4/route.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1845b08c624e..7d76b056aa3d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2822,8 +2822,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 			skb->dev = master;
 	}
 
-	__get_cpu_var(softnet_data).processed++;
-
+	__this_cpu_inc(softnet_data.processed);
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
 	skb->mac_len = skb->network_header - skb->mac_header;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 560acc677ce4..8495bceec764 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -253,8 +253,7 @@ static unsigned			rt_hash_mask __read_mostly;
 static unsigned int		rt_hash_log  __read_mostly;
 
 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
-#define RT_CACHE_STAT_INC(field) \
-	(__raw_get_cpu_var(rt_cache_stat).field++)
+#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
 
 static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
 				   int genid)
-- 
cgit v1.2.3-58-ga151


From 15e83ed78864d0625e87a85f09b297c0919a4797 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 19 May 2010 23:16:03 +0000
Subject: net: remove zap_completion_queue

netpoll does an interesting work in zap_completion_queue(), but this was
before we did skb orphaning before delivering packets to device.

It now makes sense to add a test in dev_kfree_skb_irq() to not queue a
skb if already orphaned, and to remove netpoll zap_completion_queue() as
a bonus.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c     |  4 +++-
 net/core/netpoll.c | 31 -------------------------------
 2 files changed, 3 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7d76b056aa3d..983a3c1d65c4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1577,7 +1577,9 @@ EXPORT_SYMBOL(__netif_schedule);
 
 void dev_kfree_skb_irq(struct sk_buff *skb)
 {
-	if (atomic_dec_and_test(&skb->users)) {
+	if (!skb->destructor)
+		dev_kfree_skb(skb);
+	else if (atomic_dec_and_test(&skb->users)) {
 		struct softnet_data *sd;
 		unsigned long flags;
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 94825b109551..e034342c819c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -49,7 +49,6 @@ static atomic_t trapped;
 		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
 				sizeof(struct iphdr) + sizeof(struct ethhdr))
 
-static void zap_completion_queue(void);
 static void arp_reply(struct sk_buff *skb);
 
 static unsigned int carrier_timeout = 4;
@@ -197,7 +196,6 @@ void netpoll_poll_dev(struct net_device *dev)
 
 	service_arp_queue(dev->npinfo);
 
-	zap_completion_queue();
 }
 
 void netpoll_poll(struct netpoll *np)
@@ -221,40 +219,11 @@ static void refill_skbs(void)
 	spin_unlock_irqrestore(&skb_pool.lock, flags);
 }
 
-static void zap_completion_queue(void)
-{
-	unsigned long flags;
-	struct softnet_data *sd = &get_cpu_var(softnet_data);
-
-	if (sd->completion_queue) {
-		struct sk_buff *clist;
-
-		local_irq_save(flags);
-		clist = sd->completion_queue;
-		sd->completion_queue = NULL;
-		local_irq_restore(flags);
-
-		while (clist != NULL) {
-			struct sk_buff *skb = clist;
-			clist = clist->next;
-			if (skb->destructor) {
-				atomic_inc(&skb->users);
-				dev_kfree_skb_any(skb); /* put this one back */
-			} else {
-				__kfree_skb(skb);
-			}
-		}
-	}
-
-	put_cpu_var(softnet_data);
-}
-
 static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
 {
 	int count = 0;
 	struct sk_buff *skb;
 
-	zap_completion_queue();
 	refill_skbs();
 repeat:
 
-- 
cgit v1.2.3-58-ga151


From d7fd1b5747fff3bde92777bcaf705d98ae6f8b6f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 17 May 2010 20:40:51 +0000
Subject: tcp: tcp_md5_hash_skb_data() frag_list handling

tcp_md5_hash_skb_data() should handle skb->frag_list, and eventually
recurse.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6596b4feeddc..49d0d2b8900c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2999,6 +2999,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 	const unsigned head_data_len = skb_headlen(skb) > header_len ?
 				       skb_headlen(skb) - header_len : 0;
 	const struct skb_shared_info *shi = skb_shinfo(skb);
+	struct sk_buff *frag_iter;
 
 	sg_init_table(&sg, 1);
 
@@ -3013,6 +3014,10 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 			return 1;
 	}
 
+	skb_walk_frags(skb, frag_iter)
+		if (tcp_md5_hash_skb_data(hp, frag_iter, 0))
+			return 1;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From 16dff91804777efbb0ce18b0a7e54c55e86b7beb Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 21 May 2010 22:18:34 +0000
Subject: net/can: Use memdup_user

Use memdup_user when user data is immediately copied into the
allocated region.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression from,to,size,flag;
position p;
identifier l1,l2;
@@

-  to = \(kmalloc@p\|kzalloc@p\)(size,flag);
+  to = memdup_user(from,size);
   if (
-      to==NULL
+      IS_ERR(to)
                 || ...) {
   <+... when != goto l1;
-  -ENOMEM
+  PTR_ERR(to)
   ...+>
   }
-  if (copy_from_user(to, from, size) != 0) {
-    <+... when != goto l2;
-    -EFAULT
-    ...+>
-  }
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/can/raw.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/can/raw.c b/net/can/raw.c
index da99cf153b33..ccfe633eec8e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -436,14 +436,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 
 		if (count > 1) {
 			/* filter does not fit into dfilter => alloc space */
-			filter = kmalloc(optlen, GFP_KERNEL);
-			if (!filter)
-				return -ENOMEM;
-
-			if (copy_from_user(filter, optval, optlen)) {
-				kfree(filter);
-				return -EFAULT;
-			}
+			filter = memdup_user(optval, optlen);
+			if (IS_ERR(filter))
+				return PTR_ERR(filter);
 		} else if (count == 1) {
 			if (copy_from_user(&sfilter, optval, sizeof(sfilter)))
 				return -EFAULT;
-- 
cgit v1.2.3-58-ga151


From 042604d2a3ee59c84f8293988caf35bac5de9eb3 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Fri, 21 May 2010 22:25:19 +0000
Subject: net/dccp: Use memdup_user

Use memdup_user when user data is immediately copied into the
allocated region.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression from,to,size,flag;
position p;
identifier l1,l2;
@@

-  to = \(kmalloc@p\|kzalloc@p\)(size,flag);
+  to = memdup_user(from,size);
   if (
-      to==NULL
+      IS_ERR(to)
                 || ...) {
   <+... when != goto l1;
-  -ENOMEM
+  PTR_ERR(to)
   ...+>
   }
-  if (copy_from_user(to, from, size) != 0) {
-    <+... when != goto l2;
-    -EFAULT
-    ...+>
-  }
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/proto.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b03ecf6b2bb0..f79bcef5088f 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -473,14 +473,9 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
 	if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
 		return -EINVAL;
 
-	val = kmalloc(optlen, GFP_KERNEL);
-	if (val == NULL)
-		return -ENOMEM;
-
-	if (copy_from_user(val, optval, optlen)) {
-		kfree(val);
-		return -EFAULT;
-	}
+	val = memdup_user(optval, optlen);
+	if (IS_ERR(val))
+		return PTR_ERR(val);
 
 	lock_sock(sk);
 	if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
-- 
cgit v1.2.3-58-ga151


From f5d72af9f3dfea67f014f880d00665f96622f57f Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 22 May 2010 10:43:42 +0000
Subject: caif: remove unneeded null check in caif_connect()

We already dereferenced uaddr towards the start of the function when we
checked that "uaddr->sa_family != AF_CAIF".  Both the check here and the
earlier check were added in bece7b2398d0: "caif: Rewritten socket
implementation".  Before that patch, we assumed that we recieved a valid
pointer for uaddr, and based on that, I have removed this check.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 3d0e09584fae..791249316ef3 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -874,8 +874,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	sk_stream_kill_queues(&cf_sk->sk);
 
 	err = -EINVAL;
-	if (addr_len != sizeof(struct sockaddr_caif) ||
-		!uaddr)
+	if (addr_len != sizeof(struct sockaddr_caif))
 		goto out;
 
 	memcpy(&cf_sk->conn_req.sockaddr, uaddr,
-- 
cgit v1.2.3-58-ga151


From 9bfca3c6e5965a01b7c6d8e2f2ea30799c9d43bc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 24 May 2010 06:29:34 +0000
Subject: caif: cleanup: remove duplicate checks

"phyinfo" can never be null here because we assigned it an address, so I
removed both the assert and the second check inside the if statement.  I
removed the "phyinfo->phy_layer != NULL" check as well because that was
asserted earlier.

Walter Harms suggested I move the "phyinfo->phy_ref_count++;" outside
the if condition for readability, so I have done that.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index df43f264d9fb..7c81974a45c4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -308,19 +308,15 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	caif_assert(cnfg != NULL);
 	caif_assert(phyid != 0);
 	phyinfo = &cnfg->phy_layers[phyid];
-	caif_assert(phyinfo != NULL);
 	caif_assert(phyinfo->id == phyid);
 	caif_assert(phyinfo->phy_layer != NULL);
 	caif_assert(phyinfo->phy_layer->id == phyid);
 
-	if (phyinfo != NULL &&
-	    phyinfo->phy_ref_count++ == 0 &&
-	    phyinfo->phy_layer != NULL &&
+	phyinfo->phy_ref_count++;
+	if (phyinfo->phy_ref_count == 1 &&
 	    phyinfo->phy_layer->modemcmd != NULL) {
-		caif_assert(phyinfo->phy_layer->id == phyid);
 		phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
 					     _CAIF_MODEMCMD_PHYIF_USEFULL);
-
 	}
 	adapt_layer->id = channel_id;
 
-- 
cgit v1.2.3-58-ga151


From 06c4648d46d1b757d6b9591a86810be79818b60c Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Wed, 26 May 2010 00:09:42 +0000
Subject: arp_notify: allow drivers to explicitly request a notification event.

Currently such notifications are only generated when the device comes up or the
address changes. However one use case for these notifications is to enable
faster network recovery after a virtual machine migration (by causing switches
to relearn their MAC tables). A migration appears to the network stack as a
temporary loss of carrier and therefore does not trigger either of the current
conditions. Rather than adding carrier up as a trigger (which can cause issues
when interfaces a flapping) simply add an interface which the driver can use
to explicitly trigger the notification.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 include/linux/notifier.h  |  1 +
 net/ipv4/devinet.c        |  1 +
 net/sched/sch_generic.c   | 18 ++++++++++++++++++
 4 files changed, 22 insertions(+)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40291f375024..a24916156f4e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1772,6 +1772,8 @@ extern void netif_carrier_on(struct net_device *dev);
 
 extern void netif_carrier_off(struct net_device *dev);
 
+extern void netif_notify_peers(struct net_device *dev);
+
 /**
  *	netif_dormant_on - mark device as dormant.
  *	@dev: network device
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 540703b555cb..22c2abb61974 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -210,6 +210,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
 #define NETDEV_BONDING_DESLAVE  0x0012
+#define NETDEV_NOTIFY_PEERS	0x0012
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 382bc768ed56..da14c49284f4 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1081,6 +1081,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 		}
 		ip_mc_up(in_dev);
 		/* fall through */
+	case NETDEV_NOTIFY_PEERS:
 	case NETDEV_CHANGEADDR:
 		/* Send gratuitous ARP to notify of link change */
 		if (IN_DEV_ARP_NOTIFY(in_dev)) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a63029ef3edd..bd1892fe4b21 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -327,6 +327,24 @@ void netif_carrier_off(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_carrier_off);
 
+/**
+ * 	netif_notify_peers - notify network peers about existence of @dev
+ * 	@dev: network device
+ *
+ * Generate traffic such that interested network peers are aware of
+ * @dev, such as by generating a gratuitous ARP. This may be used when
+ * a device wants to inform the rest of the network about some sort of
+ * reconfiguration such as a failover event or virtual machine
+ * migration.
+ */
+void netif_notify_peers(struct net_device *dev)
+{
+	rtnl_lock();
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL(netif_notify_peers);
+
 /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
    under all circumstances. It is difficult to invent anything faster or
    cheaper.
-- 
cgit v1.2.3-58-ga151


From c936e8bd1de2fa50c49e3df6fa5036bf07870b67 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Mon, 31 May 2010 16:41:09 +0200
Subject: netfilter: don't xt_jumpstack_alloc twice in xt_register_table

In xt_register_table, xt_jumpstack_alloc is called first, later
xt_replace_table is used. But in xt_replace_table, xt_jumpstack_alloc
will be used again. Then the memory allocated by previous xt_jumpstack_alloc
will be leaked. We can simply remove the previous xt_jumpstack_alloc because
there aren't any users of newinfo between xt_jumpstack_alloc and
xt_replace_table.

Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Cc: Patrick McHardy <kaber@trash.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jan Engelhardt <jengelh@medozas.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Acked-By: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 445de702b8b7..47b1e7917a9c 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -844,10 +844,6 @@ struct xt_table *xt_register_table(struct net *net,
 	struct xt_table_info *private;
 	struct xt_table *t, *table;
 
-	ret = xt_jumpstack_alloc(newinfo);
-	if (ret < 0)
-		return ERR_PTR(ret);
-
 	/* Don't add one object to multiple lists. */
 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
 	if (!table) {
-- 
cgit v1.2.3-58-ga151


From 7489aec8eed4f2f1eb3b4d35763bd3ea30b32ef5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 31 May 2010 16:41:35 +0200
Subject: netfilter: xtables: stackptr should be percpu

commit f3c5c1bfd4 (netfilter: xtables: make ip_tables reentrant)
introduced a performance regression, because stackptr array is shared by
all cpus, adding cache line ping pongs. (16 cpus share a 64 bytes cache
line)

Fix this using alloc_percpu()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-By: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |  2 +-
 net/ipv4/netfilter/ip_tables.c     |  2 +-
 net/ipv6/netfilter/ip6_tables.c    |  2 +-
 net/netfilter/x_tables.c           | 13 +++----------
 4 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c00cc0c4d0b7..24e5d01d27d0 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -397,7 +397,7 @@ struct xt_table_info {
 	 * @stacksize jumps (number of user chains) can possibly be made.
 	 */
 	unsigned int stacksize;
-	unsigned int *stackptr;
+	unsigned int __percpu *stackptr;
 	void ***jumpstack;
 	/* ipt_entry tables: one per CPU */
 	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 63958f3394a5..4b6c5ca610fc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -336,7 +336,7 @@ ipt_do_table(struct sk_buff *skb,
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
 	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
-	stackptr   = &private->stackptr[cpu];
+	stackptr   = per_cpu_ptr(private->stackptr, cpu);
 	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6f517bd83692..9d2d68f0e605 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -363,7 +363,7 @@ ip6t_do_table(struct sk_buff *skb,
 	cpu        = smp_processor_id();
 	table_base = private->entries[cpu];
 	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
-	stackptr   = &private->stackptr[cpu];
+	stackptr   = per_cpu_ptr(private->stackptr, cpu);
 	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 47b1e7917a9c..e34622fa0003 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -699,10 +699,8 @@ void xt_free_table_info(struct xt_table_info *info)
 		vfree(info->jumpstack);
 	else
 		kfree(info->jumpstack);
-	if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
-		vfree(info->stackptr);
-	else
-		kfree(info->stackptr);
+
+	free_percpu(info->stackptr);
 
 	kfree(info);
 }
@@ -753,14 +751,9 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
 	unsigned int size;
 	int cpu;
 
-	size = sizeof(unsigned int) * nr_cpu_ids;
-	if (size > PAGE_SIZE)
-		i->stackptr = vmalloc(size);
-	else
-		i->stackptr = kmalloc(size, GFP_KERNEL);
+	i->stackptr = alloc_percpu(unsigned int);
 	if (i->stackptr == NULL)
 		return -ENOMEM;
-	memset(i->stackptr, 0, size);
 
 	size = sizeof(void **) * nr_cpu_ids;
 	if (size > PAGE_SIZE)
-- 
cgit v1.2.3-58-ga151


From b1faf5666438090a4dc4fceac8502edc7788b7e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 31 May 2010 23:44:05 -0700
Subject: net: sock_queue_err_skb() dont mess with sk_forward_alloc

Correct sk_forward_alloc handling for error_queue would need to use a
backlog of frames that softirq handler could not deliver because socket
is owned by user thread. Or extend backlog processing to be able to
process normal and error packets.

Another possibility is to not use mem charge for error queue, this is
what I implemented in this patch.

Note: this reverts commit 29030374
(net: fix sk_forward_alloc corruptions), since we dont need to lock
socket anymore.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 15 +--------------
 net/core/skbuff.c  | 30 ++++++++++++++++++++++++++++--
 net/ipv4/udp.c     |  6 ++----
 net/ipv6/udp.c     |  6 ++----
 4 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index ca241ea14875..731150d52799 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1524,20 +1524,7 @@ extern void sk_stop_timer(struct sock *sk, struct timer_list* timer);
 
 extern int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 
-static inline int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
-{
-	/* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
-	   number of warnings when compiling with -W --ANK
-	 */
-	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-	    (unsigned)sk->sk_rcvbuf)
-		return -ENOMEM;
-	skb_set_owner_r(skb, sk);
-	skb_queue_tail(&sk->sk_error_queue, skb);
-	if (!sock_flag(sk, SOCK_DEAD))
-		sk->sk_data_ready(sk, skb->len);
-	return 0;
-}
+extern int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb);
 
 /*
  *	Recover an error report and clear atomically
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4e7ac09c281a..9f07e749d7b1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2965,6 +2965,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
 }
 EXPORT_SYMBOL_GPL(skb_cow_data);
 
+static void sock_rmem_free(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+
+	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+}
+
+/*
+ * Note: We dont mem charge error packets (no sk_forward_alloc changes)
+ */
+int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+	    (unsigned)sk->sk_rcvbuf)
+		return -ENOMEM;
+
+	skb_orphan(skb);
+	skb->sk = sk;
+	skb->destructor = sock_rmem_free;
+	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+
+	skb_queue_tail(&sk->sk_error_queue, skb);
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb->len);
+	return 0;
+}
+EXPORT_SYMBOL(sock_queue_err_skb);
+
 void skb_tstamp_tx(struct sk_buff *orig_skb,
 		struct skb_shared_hwtstamps *hwtstamps)
 {
@@ -2997,9 +3025,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 	serr->ee.ee_errno = ENOMSG;
 	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
 
-	bh_lock_sock(sk);
 	err = sock_queue_err_skb(sk, skb);
-	bh_unlock_sock(sk);
 
 	if (err)
 		kfree_skb(skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 50678f9a2763..eec4ff456e33 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -633,11 +633,9 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
 	if (!inet->recverr) {
 		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
 			goto out;
-	} else {
-		bh_lock_sock(sk);
+	} else
 		ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
-		bh_unlock_sock(sk);
-	}
+
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3048f906c042..87be58673b55 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -466,11 +466,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
 		goto out;
 
-	if (np->recverr) {
-		bh_lock_sock(sk);
+	if (np->recverr)
 		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
-		bh_unlock_sock(sk);
-	}
+
 	sk->sk_err = err;
 	sk->sk_error_report(sk);
 out:
-- 
cgit v1.2.3-58-ga151


From 288fcee8b7aa98796d96cd5b1b2e8005639328bf Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 31 May 2010 23:48:19 -0700
Subject: net/ipv4/tcp_input.c: fix compilation breakage when FASTRETRANS_DEBUG
 > 1

Commit: c720c7e8383aff1cb219bddf474ed89d850336e3 missed these.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e6dafcb1071..548d575e6cc6 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2639,7 +2639,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 	if (sk->sk_family == AF_INET) {
 		printk(KERN_DEBUG "Undo %s %pI4/%u c%u l%u ss%u/%u p%u\n",
 		       msg,
-		       &inet->daddr, ntohs(inet->dport),
+		       &inet->inet_daddr, ntohs(inet->inet_dport),
 		       tp->snd_cwnd, tcp_left_out(tp),
 		       tp->snd_ssthresh, tp->prior_ssthresh,
 		       tp->packets_out);
@@ -2649,7 +2649,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 		struct ipv6_pinfo *np = inet6_sk(sk);
 		printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
 		       msg,
-		       &np->daddr, ntohs(inet->dport),
+		       &np->daddr, ntohs(inet->inet_dport),
 		       tp->snd_cwnd, tcp_left_out(tp),
 		       tp->snd_ssthresh, tp->prior_ssthresh,
 		       tp->packets_out);
-- 
cgit v1.2.3-58-ga151


From 3ed37a6fa70a3a63dbb257fc640facb3974bba40 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 31 May 2010 17:23:21 +0000
Subject: net/ipv4/igmp.c: Remove unnecessary kmalloc casts

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 5fff865a4fa7..250cb5e1af48 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1646,8 +1646,7 @@ static int sf_setstate(struct ip_mc_list *pmc)
 				if (dpsf->sf_inaddr == psf->sf_inaddr)
 					break;
 			if (!dpsf) {
-				dpsf = (struct ip_sf_list *)
-					kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+				dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
 				if (!dpsf)
 					continue;
 				*dpsf = *psf;
-- 
cgit v1.2.3-58-ga151


From 5d55354f147bcf82b7a330bf9617b82a692b7d49 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 31 May 2010 17:23:22 +0000
Subject: net/ipv6/mcast.c: Remove unnecessary kmalloc casts

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 59f1881968c7..7b5fb43c227c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1995,8 +1995,7 @@ static int sf_setstate(struct ifmcaddr6 *pmc)
 				    &psf->sf_addr))
 					break;
 			if (!dpsf) {
-				dpsf = (struct ip6_sf_list *)
-					kmalloc(sizeof(*dpsf), GFP_ATOMIC);
+				dpsf = kmalloc(sizeof(*dpsf), GFP_ATOMIC);
 				if (!dpsf)
 					continue;
 				*dpsf = *psf;
-- 
cgit v1.2.3-58-ga151


From aac4dddc358acfd9d98b20024a42c34dfab31c39 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 00:26:58 -0700
Subject: ipv6: get rid of ipip6_prl_lock

As noticed by Julia Lawall, ipip6_tunnel_add_prl() incorrectly calls
kzallloc(..., GFP_KERNEL) while a spinlock is held. She provided
a patch to use GFP_ATOMIC instead.

One possibility would be to convert this spinlock to a mutex, or
preallocate the thing before taking the lock.

After RCU conversion, it appears we dont need this lock, since
caller already holds RTNL

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index e51e650ea80b..702c532ec21e 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -249,8 +249,6 @@ failed:
 	return NULL;
 }
 
-static DEFINE_SPINLOCK(ipip6_prl_lock);
-
 #define for_each_prl_rcu(start)			\
 	for (prl = rcu_dereference(start);	\
 	     prl;				\
@@ -340,7 +338,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 	if (a->addr == htonl(INADDR_ANY))
 		return -EINVAL;
 
-	spin_lock(&ipip6_prl_lock);
+	ASSERT_RTNL();
 
 	for (p = t->prl; p; p = p->next) {
 		if (p->addr == a->addr) {
@@ -370,7 +368,6 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 	t->prl_count++;
 	rcu_assign_pointer(t->prl, p);
 out:
-	spin_unlock(&ipip6_prl_lock);
 	return err;
 }
 
@@ -397,7 +394,7 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 	struct ip_tunnel_prl_entry *x, **p;
 	int err = 0;
 
-	spin_lock(&ipip6_prl_lock);
+	ASSERT_RTNL();
 
 	if (a && a->addr != htonl(INADDR_ANY)) {
 		for (p = &t->prl; *p; p = &(*p)->next) {
@@ -419,7 +416,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 		}
 	}
 out:
-	spin_unlock(&ipip6_prl_lock);
 	return err;
 }
 
-- 
cgit v1.2.3-58-ga151


From a8b563894d6fee9b90b7d6ed76f8ec28ad45bcbe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 11:48:31 +0200
Subject: netfilter: br_netfilter: use skb_set_noref()

Avoid dirtying bridge_parent_rtable refcount, using new dst noref
infrastructure.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72f..cbea5af24ce6 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -244,8 +244,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 		kfree_skb(skb);
 		return 0;
 	}
-	dst_hold(&rt->u.dst);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set_noref(skb, &rt->u.dst);
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
@@ -396,8 +395,7 @@ bridged_dnat:
 			kfree_skb(skb);
 			return 0;
 		}
-		dst_hold(&rt->u.dst);
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set_noref(skb, &rt->u.dst);
 	}
 
 	skb->dev = nf_bridge->physindev;
-- 
cgit v1.2.3-58-ga151


From fabf3a85ab88063c10f367cccba7b3a1e59df996 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 12:00:41 +0200
Subject: netfilter: xt_statistic: remove nth_lock spinlock

Use atomic_cmpxchg() to avoid dirtying a shared location.

xt_statistic_priv smp aligned to avoid sharing same cache line with
other stuff.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_statistic.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 96e62b8fd6b1..42ecb71d445f 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -18,8 +18,8 @@
 #include <linux/netfilter/x_tables.h>
 
 struct xt_statistic_priv {
-	uint32_t count;
-};
+	atomic_t count;
+} ____cacheline_aligned_in_smp;
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
@@ -27,13 +27,12 @@ MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
 MODULE_ALIAS("ipt_statistic");
 MODULE_ALIAS("ip6t_statistic");
 
-static DEFINE_SPINLOCK(nth_lock);
-
 static bool
 statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
+	int nval, oval;
 
 	switch (info->mode) {
 	case XT_STATISTIC_MODE_RANDOM:
@@ -41,12 +40,12 @@ statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			ret = !ret;
 		break;
 	case XT_STATISTIC_MODE_NTH:
-		spin_lock_bh(&nth_lock);
-		if (info->master->count++ == info->u.nth.every) {
-			info->master->count = 0;
+		do {
+			oval = atomic_read(&info->master->count);
+			nval = (oval == info->u.nth.every) ? 0 : oval + 1;
+		} while (atomic_cmpxchg(&info->master->count, oval, nval) != oval);
+		if (nval == 0)
 			ret = !ret;
-		}
-		spin_unlock_bh(&nth_lock);
 		break;
 	}
 
@@ -64,7 +63,7 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
 	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
 	if (info->master == NULL)
 		return -ENOMEM;
-	info->master->count = info->u.nth.count;
+	atomic_set(&info->master->count, info->u.nth.count);
 
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From fafeeb6c80e3842c6dc19d05de09a23f23eef0d8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 10:04:49 +0000
Subject: xfrm: force a dst reference in __xfrm_route_forward()

Packets going through __xfrm_route_forward() have a not refcounted dst
entry, since we enabled a noref forwarding path.

xfrm_lookup() might incorrectly release this dst entry.

It's a bit late to make invasive changes in xfrm_lookup(), so lets force
a refcount in this path.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d965a2bad8d3..4bf27d901333 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2153,6 +2153,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 		return 0;
 	}
 
+	skb_dst_force(skb);
 	dst = skb_dst(skb);
 
 	res = xfrm_lookup(net, &dst, &fl, NULL, 0) == 0;
-- 
cgit v1.2.3-58-ga151


From bc135b23d01acf7ee926aaf75b0020c86d3869f9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 03:23:51 -0700
Subject: net: Define accessors to manipulate QDISC_STATE_RUNNING

Define three helpers to manipulate QDISC_STATE_RUNNIG flag, that a
second patch will move on another location.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h   |  2 +-
 include/net/sch_generic.h | 15 +++++++++++++++
 net/core/dev.c            |  4 ++--
 net/sched/sch_generic.c   |  4 ++--
 4 files changed, 20 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9d4d87cc970e..d9549af6929a 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -95,7 +95,7 @@ extern void __qdisc_run(struct Qdisc *q);
 
 static inline void qdisc_run(struct Qdisc *q)
 {
-	if (!test_and_set_bit(__QDISC_STATE_RUNNING, &q->state))
+	if (qdisc_run_begin(q))
 		__qdisc_run(q);
 }
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 03ca5d826757..9707daed761e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -76,6 +76,21 @@ struct Qdisc {
 	struct rcu_head     rcu_head;
 };
 
+static inline bool qdisc_is_running(struct Qdisc *qdisc)
+{
+	return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
+static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+{
+	return !test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
+static inline void qdisc_run_end(struct Qdisc *qdisc)
+{
+	clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+}
+
 struct Qdisc_class_ops {
 	/* Child qdisc manipulation */
 	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 983a3c1d65c4..2733226d90b2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2047,7 +2047,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		kfree_skb(skb);
 		rc = NET_XMIT_DROP;
 	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
-		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+		   qdisc_run_begin(q)) {
 		/*
 		 * This is a work-conserving queue; there are no old skbs
 		 * waiting to be sent out; and the qdisc is not running -
@@ -2059,7 +2059,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
 			__qdisc_run(q);
 		else
-			clear_bit(__QDISC_STATE_RUNNING, &q->state);
+			qdisc_run_end(q);
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index bd1892fe4b21..37b86eab6779 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -205,7 +205,7 @@ void __qdisc_run(struct Qdisc *q)
 		}
 	}
 
-	clear_bit(__QDISC_STATE_RUNNING, &q->state);
+	qdisc_run_end(q);
 }
 
 unsigned long dev_trans_start(struct net_device *dev)
@@ -797,7 +797,7 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 
 		spin_lock_bh(root_lock);
 
-		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
+		val = (qdisc_is_running(q) ||
 		       test_bit(__QDISC_STATE_SCHED, &q->state));
 
 		spin_unlock_bh(root_lock);
-- 
cgit v1.2.3-58-ga151


From 194dbcc8a1a97cbac9a619a563e5f6b7f7d5a485 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 12 May 2010 21:31:06 +0000
Subject: net: init_vlan should not copy slave or master flags

The vlan device should not copy the slave or master flags from
the real device. It is not in the bond until added nor is it
a master.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 55be90826f5f..529842677817 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -708,7 +708,8 @@ static int vlan_dev_init(struct net_device *dev)
 	netif_carrier_off(dev);
 
 	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
-	dev->flags  = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI);
+	dev->flags  = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
+					  IFF_MASTER | IFF_SLAVE);
 	dev->iflink = real_dev->ifindex;
 	dev->state  = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
 					  (1<<__LINK_STATE_DORMANT))) |
-- 
cgit v1.2.3-58-ga151


From 2df4a0fa1540c460ec69788ab2a901cc72a75644 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 12 May 2010 21:31:11 +0000
Subject: net: fix conflict between null_or_orig and null_or_bond

If a skb is received on an inactive bond that does not meet
the special cases checked for by skb_bond_should_drop it should
only be delivered to exact matches as the comment in
netif_receive_skb() says.

However because null_or_bond could also be null this is not
always true.  This patch renames null_or_bond to orig_or_bond
and initializes it to orig_dev.  This keeps the intent of
null_or_bond to pass frames received on VLAN interfaces stacked
on bonding interfaces without invalidating the statement for
null_or_orig.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1845b08c624e..d03470f5260a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2795,7 +2795,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
-	struct net_device *null_or_bond;
+	struct net_device *orig_or_bond;
 	int ret = NET_RX_DROP;
 	__be16 type;
 
@@ -2868,10 +2868,10 @@ ncls:
 	 * device that may have registered for a specific ptype.  The
 	 * handler may have to adjust skb->dev and orig_dev.
 	 */
-	null_or_bond = NULL;
+	orig_or_bond = orig_dev;
 	if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
 	    (vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
-		null_or_bond = vlan_dev_real_dev(skb->dev);
+		orig_or_bond = vlan_dev_real_dev(skb->dev);
 	}
 
 	type = skb->protocol;
@@ -2879,7 +2879,7 @@ ncls:
 			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
 		if (ptype->type == type && (ptype->dev == null_or_orig ||
 		     ptype->dev == skb->dev || ptype->dev == orig_dev ||
-		     ptype->dev == null_or_bond)) {
+		     ptype->dev == orig_or_bond)) {
 			if (pt_prev)
 				ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = ptype;
-- 
cgit v1.2.3-58-ga151


From 79640a4ca6955e3ebdb7038508fa7a0cd7fa5527 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 05:09:29 -0700
Subject: net: add additional lock to qdisc to increase throughput

When many cpus compete for sending frames on a given qdisc, the qdisc
spinlock suffers from very high contention.

The cpu owning __QDISC_STATE_RUNNING bit has same priority to acquire
the lock, and cannot dequeue packets fast enough, since it must wait for
this lock for each dequeued packet.

One solution to this problem is to force all cpus spinning on a second
lock before trying to get the main lock, when/if they see
__QDISC_STATE_RUNNING already set.

The owning cpu then compete with at most one other cpu for the main
lock, allowing for higher dequeueing rate.

Based on a previous patch from Alexander Duyck. I added the heuristic to
avoid the atomic in fast path, and put the new lock far away from the
cache line used by the dequeue worker. Also try to release the busylock
lock as late as possible.

Tests with following script gave a boost from ~50.000 pps to ~600.000
pps on a dual quad core machine (E5450 @3.00GHz), tg3 driver.
(A single netperf flow can reach ~800.000 pps on this platform)

for j in `seq 0 3`; do
  for i in `seq 0 7`; do
    netperf -H 192.168.0.1 -t UDP_STREAM -l 60 -N -T $i -- -m 6 &
  done
done

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  3 ++-
 net/core/dev.c            | 29 +++++++++++++++++++++++++----
 net/sched/sch_generic.c   |  1 +
 3 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b3591e4a514c..b35301b0c7b6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -80,7 +80,8 @@ struct Qdisc {
 	struct gnet_stats_basic_packed bstats;
 	unsigned long		__state;
 	struct gnet_stats_queue	qstats;
-	struct rcu_head     rcu_head;
+	struct rcu_head		rcu_head;
+	spinlock_t		busylock;
 };
 
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
diff --git a/net/core/dev.c b/net/core/dev.c
index 2733226d90b2..ffca5c1066fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2040,8 +2040,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 				 struct netdev_queue *txq)
 {
 	spinlock_t *root_lock = qdisc_lock(q);
+	bool contended = qdisc_is_running(q);
 	int rc;
 
+	/*
+	 * Heuristic to force contended enqueues to serialize on a
+	 * separate lock before trying to get qdisc main lock.
+	 * This permits __QDISC_STATE_RUNNING owner to get the lock more often
+	 * and dequeue packets faster.
+	 */
+	if (unlikely(contended))
+		spin_lock(&q->busylock);
+
 	spin_lock(root_lock);
 	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
 		kfree_skb(skb);
@@ -2056,19 +2066,30 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
 			skb_dst_force(skb);
 		__qdisc_update_bstats(q, skb->len);
-		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
 			__qdisc_run(q);
-		else
+		} else
 			qdisc_run_end(q);
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
 		skb_dst_force(skb);
 		rc = qdisc_enqueue_root(skb, q);
-		qdisc_run(q);
+		if (qdisc_run_begin(q)) {
+			if (unlikely(contended)) {
+				spin_unlock(&q->busylock);
+				contended = false;
+			}
+			__qdisc_run(q);
+		}
 	}
 	spin_unlock(root_lock);
-
+	if (unlikely(contended))
+		spin_unlock(&q->busylock);
 	return rc;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 37b86eab6779..d20fcd2a5519 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -561,6 +561,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 
 	INIT_LIST_HEAD(&sch->list);
 	skb_queue_head_init(&sch->q);
+	spin_lock_init(&sch->busylock);
 	sch->ops = ops;
 	sch->enqueue = ops->enqueue;
 	sch->dequeue = ops->dequeue;
-- 
cgit v1.2.3-58-ga151


From c2d9ba9bce8d7323ca96f239e1f505c14d6244fb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 1 Jun 2010 06:51:19 +0000
Subject: net: CONFIG_NET_NS reduction

Use read_pnet() and write_pnet() to reduce number of ifdef CONFIG_NET_NS

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h            |  6 +-----
 include/net/cfg80211.h               | 15 ++-------------
 include/net/genetlink.h              | 15 ++-------------
 include/net/netfilter/nf_conntrack.h |  6 +-----
 include/net/sock.h                   | 10 ++--------
 net/ipv6/addrlabel.c                 |  6 +-----
 net/netfilter/nf_conntrack_core.c    |  8 ++------
 7 files changed, 11 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a24916156f4e..bd6b75317d5f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1087,11 +1087,7 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 static inline
 struct net *dev_net(const struct net_device *dev)
 {
-#ifdef CONFIG_NET_NS
-	return dev->nd_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&dev->nd_net);
 }
 
 static inline
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b44a2e5321a3..e7ebeb8bdf71 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1330,26 +1330,15 @@ struct wiphy {
 	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
 };
 
-#ifdef CONFIG_NET_NS
-static inline struct net *wiphy_net(struct wiphy *wiphy)
-{
-	return wiphy->_net;
-}
-
-static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
-{
-	wiphy->_net = net;
-}
-#else
 static inline struct net *wiphy_net(struct wiphy *wiphy)
 {
-	return &init_net;
+	return read_pnet(&wiphy->_net);
 }
 
 static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
 {
+	write_pnet(&wiphy->_net, net);
 }
-#endif
 
 /**
  * wiphy_priv - return priv from wiphy
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index eb551baafc04..f7dcd2c70412 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -68,26 +68,15 @@ struct genl_info {
 #endif
 };
 
-#ifdef CONFIG_NET_NS
 static inline struct net *genl_info_net(struct genl_info *info)
 {
-	return info->_net;
+	return read_pnet(&info->_net);
 }
 
 static inline void genl_info_net_set(struct genl_info *info, struct net *net)
 {
-	info->_net = net;
+	write_pnet(&info->_net, net);
 }
-#else
-static inline struct net *genl_info_net(struct genl_info *info)
-{
-	return &init_net;
-}
-
-static inline void genl_info_net_set(struct genl_info *info, struct net *net)
-{
-}
-#endif
 
 /**
  * struct genl_ops - generic netlink operations
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index bde095f7e845..bbfdd9453087 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -152,11 +152,7 @@ extern struct net init_net;
 
 static inline struct net *nf_ct_net(const struct nf_conn *ct)
 {
-#ifdef CONFIG_NET_NS
-	return ct->ct_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&ct->ct_net);
 }
 
 /* Alter reply tuple (maybe alter helper). */
diff --git a/include/net/sock.h b/include/net/sock.h
index ca241ea14875..3461e5d1e9ad 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1724,19 +1724,13 @@ static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, int copied_e
 static inline
 struct net *sock_net(const struct sock *sk)
 {
-#ifdef CONFIG_NET_NS
-	return sk->sk_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&sk->sk_net);
 }
 
 static inline
 void sock_net_set(struct sock *sk, struct net *net)
 {
-#ifdef CONFIG_NET_NS
-	sk->sk_net = net;
-#endif
+	write_pnet(&sk->sk_net, net);
 }
 
 /*
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8c4348cb1950..f0e774cea386 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -53,11 +53,7 @@ static struct ip6addrlbl_table
 static inline
 struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
 {
-#ifdef CONFIG_NET_NS
-	return lbl->lbl_net;
-#else
-	return &init_net;
-#endif
+	return read_pnet(&lbl->lbl_net);
 }
 
 /*
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc73982..77288980fae0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -619,9 +619,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
 	ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
 	/* Don't set timer yet: wait for confirmation */
 	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
-#ifdef CONFIG_NET_NS
-	ct->ct_net = net;
-#endif
+	write_pnet(&ct->ct_net, net);
 #ifdef CONFIG_NF_CONNTRACK_ZONES
 	if (zone) {
 		struct nf_conntrack_zone *nf_ct_zone;
@@ -1363,9 +1361,7 @@ static int nf_conntrack_init_init_net(void)
 		goto err_extend;
 #endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
+	write_pnet(&nf_conntrack_untracked.ct_net, &init_net);
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
 	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-- 
cgit v1.2.3-58-ga151


From 614f60fa9d73a9e8fdff3df83381907fea7c5649 Mon Sep 17 00:00:00 2001
From: Scott McMillan <scott.a.mcmillan@intel.com>
Date: Wed, 2 Jun 2010 05:53:56 -0700
Subject: packet_mmap: expose hw packet timestamps to network packet capture
 utilities

This patch adds a setting, PACKET_TIMESTAMP, to specify the packet
timestamp source that is exported to capture utilities like tcpdump by
packet_mmap.

PACKET_TIMESTAMP accepts the same integer bit field as
SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE and
SOF_TIMESTAMPING_RAW_HARDWARE values are currently recognized by
PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.

If PACKET_TIMESTAMP is not set, a software timestamp generated inside
the networking stack is used (the behavior before this setting was
added).

Signed-off-by: Scott McMillan <scott.a.mcmillan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/packet_mmap.txt | 26 ++++++++++++++++++++++
 include/linux/if_packet.h                |  1 +
 net/packet/af_packet.c                   | 37 ++++++++++++++++++++++++++++++--
 3 files changed, 62 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 98f71a5cef00..2546aa4dc232 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -493,6 +493,32 @@ The user can also use poll() to check if a buffer is available:
     pfd.events = POLLOUT;
     retval = poll(&pfd, 1, timeout);
 
+-------------------------------------------------------------------------------
++ PACKET_TIMESTAMP
+-------------------------------------------------------------------------------
+
+The PACKET_TIMESTAMP setting determines the source of the timestamp in
+the packet meta information.  If your NIC is capable of timestamping
+packets in hardware, you can request those hardware timestamps to used.
+Note: you may need to enable the generation of hardware timestamps with
+SIOCSHWTSTAMP.
+
+PACKET_TIMESTAMP accepts the same integer bit field as
+SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE
+and SOF_TIMESTAMPING_RAW_HARDWARE values are recognized by
+PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
+SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.
+
+    int req = 0;
+    req |= SOF_TIMESTAMPING_SYS_HARDWARE;
+    setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
+
+If PACKET_TIMESTAMP is not set, a software timestamp generated inside
+the networking stack is used (the behavior before this setting was added).
+
+See include/linux/net_tstamp.h and Documentation/networking/timestamping
+for more information on hardware timestamps.
+
 --------------------------------------------------------------------------------
 + THANKS
 --------------------------------------------------------------------------------
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 6ac23ef1801a..72bfa5a034dd 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -48,6 +48,7 @@ struct sockaddr_ll {
 #define PACKET_LOSS			14
 #define PACKET_VNET_HDR			15
 #define PACKET_TX_TIMESTAMP		16
+#define PACKET_TIMESTAMP		17
 
 struct tpacket_stats {
 	unsigned int	tp_packets;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2078a277e06b..9a17f28b1253 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -83,6 +83,7 @@
 #include <linux/if_vlan.h>
 #include <linux/virtio_net.h>
 #include <linux/errqueue.h>
+#include <linux/net_tstamp.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -202,6 +203,7 @@ struct packet_sock {
 	unsigned int		tp_hdrlen;
 	unsigned int		tp_reserve;
 	unsigned int		tp_loss:1;
+	unsigned int		tp_tstamp;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };
 
@@ -656,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct sk_buff *copy_skb = NULL;
 	struct timeval tv;
 	struct timespec ts;
+	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -737,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h1->tp_snaplen = snaplen;
 		h.h1->tp_mac = macoff;
 		h.h1->tp_net = netoff;
-		if (skb->tstamp.tv64)
+		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+				&& shhwtstamps->syststamp.tv64)
+			tv = ktime_to_timeval(shhwtstamps->syststamp);
+		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+				&& shhwtstamps->hwtstamp.tv64)
+			tv = ktime_to_timeval(shhwtstamps->hwtstamp);
+		else if (skb->tstamp.tv64)
 			tv = ktime_to_timeval(skb->tstamp);
 		else
 			do_gettimeofday(&tv);
@@ -750,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_snaplen = snaplen;
 		h.h2->tp_mac = macoff;
 		h.h2->tp_net = netoff;
-		if (skb->tstamp.tv64)
+		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+				&& shhwtstamps->syststamp.tv64)
+			ts = ktime_to_timespec(shhwtstamps->syststamp);
+		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+				&& shhwtstamps->hwtstamp.tv64)
+			ts = ktime_to_timespec(shhwtstamps->hwtstamp);
+		else if (skb->tstamp.tv64)
 			ts = ktime_to_timespec(skb->tstamp);
 		else
 			getnstimeofday(&ts);
@@ -2027,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->has_vnet_hdr = !!val;
 		return 0;
 	}
+	case PACKET_TIMESTAMP:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->tp_tstamp = val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -2119,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		val = po->tp_loss;
 		data = &val;
 		break;
+	case PACKET_TIMESTAMP:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->tp_tstamp;
+		data = &val;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3-58-ga151


From 33c29dde7d04dc0ec0edb649d20ccf1351c13a06 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 29 May 2010 14:26:59 +0000
Subject: act_nat: fix the wrong checksum when addr isn't in old_addr/mask

fix the wrong checksum when addr isn't in old_addr/mask

For TCP and UDP packets, when addr isn't in old_addr/mask we don't do SNAT or
DNAT, and we should not update layer 4 checksum.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/sched/act_nat.c |    4 ++++
 1 file changed, 4 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index d885ba311564..570949417f38 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -159,6 +159,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 			iph->daddr = new_addr;
 
 		csum_replace4(&iph->check, addr, new_addr);
+	} else if ((iph->frag_off & htons(IP_OFFSET)) ||
+		   iph->protocol != IPPROTO_ICMP) {
+		goto out;
 	}
 
 	ihl = iph->ihl * 4;
@@ -247,6 +250,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 		break;
 	}
 
+out:
 	return action;
 
 drop:
-- 
cgit v1.2.3-58-ga151


From 130c0f47fdf9c533deb1136cad5136f6cb7020f0 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Sun, 30 May 2010 17:19:53 +0000
Subject: ipconfig: send host-name in DHCP requests

Normally dhclient can be configured to send the "host-name" option
in DHCP requests to update the client's DNS record. However for an
NFSROOT system, dhclient shall never be called (which may change the
IP addr and therefore lose your root NFS mount connection).

So enable updating the DNS record with kernel parameter

	ip=::::$HOST_NAME::dhcp

Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipconfig.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index b9d84e800cf4..3a6e1ec5e9ae 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -665,6 +665,13 @@ ic_dhcp_init_options(u8 *options)
 		memcpy(e, ic_req_params, sizeof(ic_req_params));
 		e += sizeof(ic_req_params);
 
+		if (ic_host_name_set) {
+			*e++ = 12;	/* host-name */
+			len = strlen(utsname()->nodename);
+			*e++ = len;
+			memcpy(e, utsname()->nodename, len);
+			e += len;
+		}
 		if (*vendor_class_identifier) {
 			printk(KERN_INFO "DHCP: sending class identifier \"%s\"\n",
 			       vendor_class_identifier);
-- 
cgit v1.2.3-58-ga151


From 20c59de2e6b6bc74bbf714dcd4e720afe8d516cf Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard <arno@natisbad.org>
Date: Tue, 1 Jun 2010 21:35:01 +0000
Subject: ipv6: Refactor update of IPv6 flowi destination address for srcrt
 (RH) option

There are more than a dozen occurrences of following code in the
IPv6 stack:

    if (opt && opt->srcrt) {
            struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
            ipv6_addr_copy(&final, &fl.fl6_dst);
            ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
            final_p = &final;
    }

Replace those with a helper. Note that the helper overrides final_p
in all cases. This is ok as final_p was previously initialized to
NULL when declared.

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h               |  4 ++++
 net/dccp/ipv6.c                  | 30 ++++++------------------------
 net/ipv6/af_inet6.c              |  9 ++-------
 net/ipv6/datagram.c              | 18 ++++--------------
 net/ipv6/exthdrs.c               | 24 ++++++++++++++++++++++++
 net/ipv6/inet6_connection_sock.c |  9 ++-------
 net/ipv6/raw.c                   | 10 ++--------
 net/ipv6/syncookies.c            |  9 ++-------
 net/ipv6/tcp_ipv6.c              | 27 ++++++---------------------
 net/ipv6/udp.c                   | 11 +++--------
 10 files changed, 55 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 2600b69757b8..f5808d596aab 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -551,6 +551,10 @@ extern int 			ipv6_ext_hdr(u8 nexthdr);
 
 extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
 
+extern struct in6_addr *fl6_update_dst(struct flowi *fl,
+				       const struct ipv6_txoptions *opt,
+				       struct in6_addr *orig);
+
 /*
  *	socket options (ipv6_sockglue.c)
  */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 091698899594..6e3f32575df7 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -248,7 +248,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff *skb;
 	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr *final_p = NULL, final;
+	struct in6_addr *final_p, final;
 	struct flowi fl;
 	int err = -1;
 	struct dst_entry *dst;
@@ -265,13 +265,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 	opt = np->opt;
 
-	if (opt != NULL && opt->srcrt != NULL) {
-		const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
-
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
@@ -545,19 +539,13 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		goto out_overflow;
 
 	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_DCCP;
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		if (opt != NULL && opt->srcrt != NULL) {
-			const struct rt0_hdr *rt0 = (struct rt0_hdr *)opt->srcrt;
-
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.fl_ip_dport = inet_rsk(req)->rmt_port;
@@ -885,7 +873,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct in6_addr *saddr = NULL, *final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -988,13 +976,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.fl_ip_sport = inet->inet_sport;
 	security_sk_classify_flow(sk, &fl);
 
-	if (np->opt != NULL && np->opt->srcrt != NULL) {
-		const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e733942dafe1..94b1b9c954bf 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -651,7 +651,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 	if (dst == NULL) {
 		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
@@ -665,12 +665,7 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl.fl_ip_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, &fl);
 
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, np->opt, &final);
 
 		err = ip6_dst_lookup(sk, &dst, &fl);
 		if (err) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 712684687c9a..7d929a22cbc2 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -38,10 +38,11 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
 	struct inet_sock      	*inet = inet_sk(sk);
 	struct ipv6_pinfo      	*np = inet6_sk(sk);
-	struct in6_addr		*daddr, *final_p = NULL, final;
+	struct in6_addr		*daddr, *final_p, final;
 	struct dst_entry	*dst;
 	struct flowi		fl;
 	struct ip6_flowlabel	*flowlabel = NULL;
+	struct ipv6_txoptions   *opt;
 	int			addr_type;
 	int			err;
 
@@ -155,19 +156,8 @@ ipv4_connected:
 
 	security_sk_classify_flow(sk, &fl);
 
-	if (flowlabel) {
-		if (flowlabel->opt && flowlabel->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-	} else if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	opt = flowlabel ? flowlabel->opt : np->opt;
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 8a659f92d17a..853a633a94d4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -874,3 +874,27 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
 	return opt;
 }
 
+/**
+ * fl6_update_dst - update flowi destination address with info given
+ *                  by srcrt option, if any.
+ *
+ * @fl: flowi for which fl6_dst is to be updated
+ * @opt: struct ipv6_txoptions in which to look for srcrt opt
+ * @orig: copy of original fl6_dst address if modified
+ *
+ * Returns NULL if no txoptions or no srcrt, otherwise returns orig
+ * and initial value of fl->fl6_dst set in orig
+ */
+struct in6_addr *fl6_update_dst(struct flowi *fl,
+				const struct ipv6_txoptions *opt,
+				struct in6_addr *orig)
+{
+	if (!opt || !opt->srcrt)
+		return NULL;
+
+	ipv6_addr_copy(orig, &fl->fl6_dst);
+	ipv6_addr_copy(&fl->fl6_dst, ((struct rt0_hdr *)opt->srcrt)->addr);
+	return orig;
+}
+
+EXPORT_SYMBOL_GPL(fl6_update_dst);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 0c5e3c3b7fd5..8a1628023bd1 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -185,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct flowi fl;
 	struct dst_entry *dst;
-	struct in6_addr *final_p = NULL, final;
+	struct in6_addr *final_p, final;
 
 	memset(&fl, 0, sizeof(fl));
 	fl.proto = sk->sk_protocol;
@@ -199,12 +199,7 @@ int inet6_csk_xmit(struct sk_buff *skb)
 	fl.fl_ip_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, &fl);
 
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4a4dcbe4f8b2..864eb8e03b1b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -725,7 +725,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 {
 	struct ipv6_txoptions opt_space;
 	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	struct in6_addr *daddr, *final_p, final;
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct raw6_sock *rp = raw6_sk(sk);
@@ -847,13 +847,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
 		fl.oif = np->mcast_oif;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 34d1f0690d7e..12383705dbad 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -240,17 +240,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	 * me if there is a preferred way.
 	 */
 	{
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, np->opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2b7c3a100e2c..e487080d02db 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -129,7 +129,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct in6_addr *saddr = NULL, *final_p = NULL, final;
+	struct in6_addr *saddr = NULL, *final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int addr_type;
@@ -250,12 +250,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl.fl_ip_dport = usin->sin6_port;
 	fl.fl_ip_sport = inet->inet_sport;
 
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, np->opt, &final);
 
 	security_sk_classify_flow(sk, &fl);
 
@@ -477,7 +472,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	struct ipv6_txoptions *opt = NULL;
-	struct in6_addr * final_p = NULL, final;
+	struct in6_addr * final_p, final;
 	struct flowi fl;
 	struct dst_entry *dst;
 	int err = -1;
@@ -494,12 +489,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	security_req_classify_flow(req, &fl);
 
 	opt = np->opt;
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
+	final_p = fl6_update_dst(&fl, opt, &final);
 
 	err = ip6_dst_lookup(sk, &dst, &fl);
 	if (err)
@@ -1392,18 +1382,13 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		goto out_overflow;
 
 	if (dst == NULL) {
-		struct in6_addr *final_p = NULL, final;
+		struct in6_addr *final_p, final;
 		struct flowi fl;
 
 		memset(&fl, 0, sizeof(fl));
 		fl.proto = IPPROTO_TCP;
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		if (opt && opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
+		final_p = fl6_update_dst(&fl, opt, &final);
 		ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
 		fl.oif = sk->sk_bound_dev_if;
 		fl.mark = sk->sk_mark;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3048f906c042..4aea57dec75f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -929,7 +929,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
-	struct in6_addr *daddr, *final_p = NULL, final;
+	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct flowi fl;
@@ -1099,14 +1099,9 @@ do_udp_sendmsg:
 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 	fl.fl_ip_sport = inet->inet_sport;
 
-	/* merge ip6_build_xmit from ip6_output */
-	if (opt && opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
+	final_p = fl6_update_dst(&fl, opt, &final);
+	if (final_p)
 		connected = 0;
-	}
 
 	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
 		fl.oif = np->mcast_oif;
-- 
cgit v1.2.3-58-ga151


From ab95bfe01f9872459c8678572ccadbf646badad0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 1 Jun 2010 21:52:08 +0000
Subject: net: replace hooks in __netif_receive_skb V5

What this patch does is it removes two receive frame hooks (for bridge and for
macvlan) from __netif_receive_skb. These are replaced them with a single
hook for both. It only supports one hook per device because it makes no
sense to do bridging and macvlan on the same device.

Then a network driver (of virtual netdev like macvlan or bridge) can register
an rx_handler for needed net device.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      |  19 +++++---
 include/linux/if_bridge.h  |   2 -
 include/linux/if_macvlan.h |   4 --
 include/linux/netdevice.h  |   7 +++
 net/bridge/br.c            |   2 -
 net/bridge/br_if.c         |   8 +++
 net/bridge/br_input.c      |  12 +++--
 net/bridge/br_private.h    |   3 +-
 net/core/dev.c             | 119 +++++++++++++++++++++------------------------
 9 files changed, 93 insertions(+), 83 deletions(-)

(limited to 'net')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 87e8d4cb4057..53422ce26f7f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,15 +145,16 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
-					    struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 {
+	struct macvlan_port *port;
 	const struct ethhdr *eth = eth_hdr(skb);
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
+	port = rcu_dereference(skb->dev->macvlan_port);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
@@ -515,6 +516,7 @@ static int macvlan_port_create(struct net_device *dev)
 {
 	struct macvlan_port *port;
 	unsigned int i;
+	int err;
 
 	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK)
 		return -EINVAL;
@@ -528,13 +530,21 @@ static int macvlan_port_create(struct net_device *dev)
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
-	return 0;
+
+	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
+	if (err) {
+		rcu_assign_pointer(dev->macvlan_port, NULL);
+		kfree(port);
+	}
+
+	return err;
 }
 
 static void macvlan_port_destroy(struct net_device *dev)
 {
 	struct macvlan_port *port = dev->macvlan_port;
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->macvlan_port, NULL);
 	synchronize_rcu();
 	kfree(port);
@@ -767,14 +777,12 @@ static int __init macvlan_init_module(void)
 	int err;
 
 	register_netdevice_notifier(&macvlan_notifier_block);
-	macvlan_handle_frame_hook = macvlan_handle_frame;
 
 	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
 err1:
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 	return err;
 }
@@ -782,7 +790,6 @@ err1:
 static void __exit macvlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&macvlan_link_ops);
-	macvlan_handle_frame_hook = NULL;
 	unregister_netdevice_notifier(&macvlan_notifier_block);
 }
 
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 938b7e81df95..0d241a5c4909 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -102,8 +102,6 @@ struct __fdb_entry {
 #include <linux/netdevice.h>
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
-extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					       struct sk_buff *skb);
 extern int (*br_should_route_hook)(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9ea047aca795..c26a0e4f0ce8 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -84,8 +84,4 @@ extern int macvlan_link_register(struct rtnl_link_ops *ops);
 extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
-
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
-						    struct sk_buff *);
-
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bd6b75317d5f..5156b806924c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -381,6 +381,8 @@ enum gro_result {
 };
 typedef enum gro_result gro_result_t;
 
+typedef struct sk_buff *rx_handler_func_t(struct sk_buff *skb);
+
 extern void __napi_schedule(struct napi_struct *n);
 
 static inline int napi_disable_pending(struct napi_struct *n)
@@ -957,6 +959,7 @@ struct net_device {
 #endif
 
 	struct netdev_queue	rx_queue;
+	rx_handler_func_t	*rx_handler;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
@@ -1689,6 +1692,10 @@ static inline void napi_free_frags(struct napi_struct *napi)
 	napi->skb = NULL;
 }
 
+extern int netdev_rx_handler_register(struct net_device *dev,
+				      rx_handler_func_t *rx_handler);
+extern void netdev_rx_handler_unregister(struct net_device *dev);
+
 extern void		netif_nit_deliver(struct sk_buff *skb);
 extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 76357b547752..c8436fa31344 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -63,7 +63,6 @@ static int __init br_init(void)
 		goto err_out4;
 
 	brioctl_set(br_ioctl_deviceless_stub);
-	br_handle_frame_hook = br_handle_frame;
 
 #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
@@ -100,7 +99,6 @@ static void __exit br_deinit(void)
 	br_fdb_test_addr_hook = NULL;
 #endif
 
-	br_handle_frame_hook = NULL;
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 18b245e2c00e..d9242342837e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,6 +147,7 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	netdev_rx_handler_unregister(dev);
 	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
@@ -429,6 +430,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		goto err2;
 
 	rcu_assign_pointer(dev->br_port, p);
+
+	err = netdev_rx_handler_register(dev, br_handle_frame);
+	if (err)
+		goto err3;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -451,6 +457,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	br_netpoll_enable(br, dev);
 
 	return 0;
+err3:
+	rcu_assign_pointer(dev->br_port, NULL);
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d36e700f7a26..99647d8f95c8 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -131,15 +131,19 @@ static inline int is_link_local(const unsigned char *dest)
 }
 
 /*
- * Called via br_handle_frame_hook.
  * Return NULL if skb is handled
- * note: already called with rcu_read_lock (preempt_disabled)
+ * note: already called with rcu_read_lock (preempt_disabled) from
+ * netif_receive_skb
  */
-struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
+struct sk_buff *br_handle_frame(struct sk_buff *skb)
 {
+	struct net_bridge_port *p;
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	int (*rhook)(struct sk_buff *skb);
 
+	if (skb->pkt_type == PACKET_LOOPBACK)
+		return skb;
+
 	if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
 		goto drop;
 
@@ -147,6 +151,8 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
+	p = rcu_dereference(skb->dev->br_port);
+
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
 		if (skb->protocol == htons(ETH_P_PAUSE))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f4a74bc6a9b..c83519b555bb 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -331,8 +331,7 @@ extern void br_features_recompute(struct net_bridge *br);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
-extern struct sk_buff *br_handle_frame(struct net_bridge_port *p,
-				       struct sk_buff *skb);
+extern struct sk_buff *br_handle_frame(struct sk_buff *skb);
 
 /* br_ioctl.c */
 extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
diff --git a/net/core/dev.c b/net/core/dev.c
index ffca5c1066fa..ec01a5998d70 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2604,70 +2604,14 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
-#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
-
-#if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)
+#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
+    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-/*
- * If bridge module is loaded call bridging hook.
- *  returns NULL if packet was consumed.
- */
-struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
-					struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(br_handle_frame_hook);
-
-static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
-					    struct packet_type **pt_prev, int *ret,
-					    struct net_device *orig_dev)
-{
-	struct net_bridge_port *port;
-
-	if (skb->pkt_type == PACKET_LOOPBACK ||
-	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-
-	return br_handle_frame_hook(port, skb);
-}
-#else
-#define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
-#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
-					     struct sk_buff *skb) __read_mostly;
-EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
-
-static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
-					     struct packet_type **pt_prev,
-					     int *ret,
-					     struct net_device *orig_dev)
-{
-	struct macvlan_port *port;
-
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (!port)
-		return skb;
-
-	if (*pt_prev) {
-		*ret = deliver_skb(skb, *pt_prev, orig_dev);
-		*pt_prev = NULL;
-	}
-	return macvlan_handle_frame_hook(port, skb);
-}
-#else
-#define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-#endif
-
 #ifdef CONFIG_NET_CLS_ACT
 /* TODO: Maybe we should just force sch_ingress to be compiled in
  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
@@ -2763,6 +2707,47 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+/**
+ *	netdev_rx_handler_register - register receive handler
+ *	@dev: device to register a handler for
+ *	@rx_handler: receive handler to register
+ *
+ *	Register a receive hander for a device. This handler will then be
+ *	called from __netif_receive_skb. A negative errno code is returned
+ *	on a failure.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int netdev_rx_handler_register(struct net_device *dev,
+			       rx_handler_func_t *rx_handler)
+{
+	ASSERT_RTNL();
+
+	if (dev->rx_handler)
+		return -EBUSY;
+
+	rcu_assign_pointer(dev->rx_handler, rx_handler);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
+
+/**
+ *	netdev_rx_handler_unregister - unregister receive handler
+ *	@dev: device to unregister a handler from
+ *
+ *	Unregister a receive hander from a device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void netdev_rx_handler_unregister(struct net_device *dev)
+{
+
+	ASSERT_RTNL();
+	rcu_assign_pointer(dev->rx_handler, NULL);
+}
+EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
+
 static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
 					      struct net_device *master)
 {
@@ -2815,6 +2800,7 @@ EXPORT_SYMBOL(__skb_bond_should_drop);
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
+	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
 	struct net_device *master;
 	struct net_device *null_or_orig;
@@ -2877,12 +2863,17 @@ static int __netif_receive_skb(struct sk_buff *skb)
 ncls:
 #endif
 
-	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
-	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
-	if (!skb)
-		goto out;
+	/* Handle special case of bridge or macvlan */
+	rx_handler = rcu_dereference(skb->dev->rx_handler);
+	if (rx_handler) {
+		if (pt_prev) {
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+			pt_prev = NULL;
+		}
+		skb = rx_handler(skb);
+		if (!skb)
+			goto out;
+	}
 
 	/*
 	 * Make sure frames received on VLAN interfaces stacked on
-- 
cgit v1.2.3-58-ga151


From edafe502404f3669d364b6e96d79b54067b634b4 Mon Sep 17 00:00:00 2001
From: Daniele Lacamera <root@danielinux.net>
Date: Wed, 2 Jun 2010 02:02:04 +0000
Subject: TCP: tcp_hybla: Fix integer overflow in slow start increment

For large values of rtt, 2^rho operation may overflow u32. Clamp down the increment to 2^16.

Signed-off-by: Daniele Lacamera <root@danielinux.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_hybla.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index c209e054a634..377bc9349371 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -126,8 +126,8 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
 		 * calculate 2^fract in a <<7 value.
 		 */
 		is_slowstart = 1;
-		increment = ((1 << ca->rho) * hybla_fraction(rho_fractions))
-			- 128;
+		increment = ((1 << min(ca->rho, 16U)) *
+			hybla_fraction(rho_fractions)) - 128;
 	} else {
 		/*
 		 * congestion avoidance
-- 
cgit v1.2.3-58-ga151


From fbc2e7d9cf49e0bf89b9e91fd60a06851a855c5d Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 2 Jun 2010 07:32:42 -0700
Subject: cls_u32: use skb_header_pointer() to dereference data safely

use skb_header_pointer() to dereference data safely

the original skb->data dereference isn't safe, as there isn't any skb->len or
skb_is_nonlinear() check. skb_header_pointer() is used instead in this patch.
And when the skb isn't long enough, we terminate the function u32_classify()
immediately with -1.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_u32.c | 49 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 96275422c619..4f522143811e 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -98,11 +98,11 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
 {
 	struct {
 		struct tc_u_knode *knode;
-		u8		  *ptr;
+		unsigned int	  off;
 	} stack[TC_U32_MAXDEPTH];
 
 	struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
-	u8 *ptr = skb_network_header(skb);
+	unsigned int off = skb_network_offset(skb);
 	struct tc_u_knode *n;
 	int sdepth = 0;
 	int off2 = 0;
@@ -134,8 +134,14 @@ next_knode:
 #endif
 
 		for (i = n->sel.nkeys; i>0; i--, key++) {
-
-			if ((*(__be32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
+			unsigned int toff;
+			__be32 *data, _data;
+
+			toff = off + key->off + (off2 & key->offmask);
+			data = skb_header_pointer(skb, toff, 4, &_data);
+			if (!data)
+				goto out;
+			if ((*data ^ key->val) & key->mask) {
 				n = n->next;
 				goto next_knode;
 			}
@@ -174,29 +180,45 @@ check_terminal:
 		if (sdepth >= TC_U32_MAXDEPTH)
 			goto deadloop;
 		stack[sdepth].knode = n;
-		stack[sdepth].ptr = ptr;
+		stack[sdepth].off = off;
 		sdepth++;
 
 		ht = n->ht_down;
 		sel = 0;
-		if (ht->divisor)
-			sel = ht->divisor&u32_hash_fold(*(__be32*)(ptr+n->sel.hoff), &n->sel,n->fshift);
-
+		if (ht->divisor) {
+			__be32 *data, _data;
+
+			data = skb_header_pointer(skb, off + n->sel.hoff, 4,
+						  &_data);
+			if (!data)
+				goto out;
+			sel = ht->divisor & u32_hash_fold(*data, &n->sel,
+							  n->fshift);
+		}
 		if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
 			goto next_ht;
 
 		if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
 			off2 = n->sel.off + 3;
-			if (n->sel.flags&TC_U32_VAROFFSET)
-				off2 += ntohs(n->sel.offmask & *(__be16*)(ptr+n->sel.offoff)) >>n->sel.offshift;
+			if (n->sel.flags & TC_U32_VAROFFSET) {
+				__be16 *data, _data;
+
+				data = skb_header_pointer(skb,
+							  off + n->sel.offoff,
+							  2, &_data);
+				if (!data)
+					goto out;
+				off2 += ntohs(n->sel.offmask & *data) >>
+					n->sel.offshift;
+			}
 			off2 &= ~3;
 		}
 		if (n->sel.flags&TC_U32_EAT) {
-			ptr += off2;
+			off += off2;
 			off2 = 0;
 		}
 
-		if (ptr < skb_tail_pointer(skb))
+		if (off < skb->len)
 			goto next_ht;
 	}
 
@@ -204,9 +226,10 @@ check_terminal:
 	if (sdepth--) {
 		n = stack[sdepth].knode;
 		ht = n->ht_up;
-		ptr = stack[sdepth].ptr;
+		off = stack[sdepth].off;
 		goto check_terminal;
 	}
+out:
 	return -1;
 
 deadloop:
-- 
cgit v1.2.3-58-ga151


From b5f7e7554753e2cc3ef3bef0271fdb32027df2ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 12:05:27 +0000
Subject: ipv4: add LINUX_MIB_IPRPFILTER snmp counter

Christoph Lameter mentioned that packets could be dropped in input path
because of rp_filter settings, without any SNMP counter being
incremented. System administrator can have a hard time to track the
problem.

This patch introduces a new counter, LINUX_MIB_IPRPFILTER, incremented
each time we drop a packet because Reverse Path Filter triggers.

(We receive an IPv4 datagram on a given interface, and find the route to
send an answer would use another interface)

netstat -s | grep IPReversePathFilter
    IPReversePathFilter: 21714

Reported-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h    |  1 +
 net/ipv4/fib_frontend.c |  6 ++++--
 net/ipv4/ip_input.c     |  3 +++
 net/ipv4/proc.c         |  1 +
 net/ipv4/route.c        | 31 ++++++++++++++++++-------------
 5 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 52797714ade7..ebb0c80ffd6e 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -229,6 +229,7 @@ enum
 	LINUX_MIB_TCPBACKLOGDROP,
 	LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */
 	LINUX_MIB_TCPDEFERACCEPTDROP,
+	LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4f0ed458c883..e830f7a123bd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -284,7 +284,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 	if (no_addr)
 		goto last_resort;
 	if (rpf == 1)
-		goto e_inval;
+		goto e_rpf;
 	fl.oif = dev->ifindex;
 
 	ret = 0;
@@ -299,7 +299,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 
 last_resort:
 	if (rpf)
-		goto e_inval;
+		goto e_rpf;
 	*spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
 	*itag = 0;
 	return 0;
@@ -308,6 +308,8 @@ e_inval_res:
 	fib_res_put(&res);
 e_inval:
 	return -EINVAL;
+e_rpf:
+	return -EXDEV;
 }
 
 static inline __be32 sk_extract_addr(struct sockaddr *addr)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d930dc5e4d85..d52c9da644cf 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -340,6 +340,9 @@ static int ip_rcv_finish(struct sk_buff *skb)
 			else if (err == -ENETUNREACH)
 				IP_INC_STATS_BH(dev_net(skb->dev),
 						IPSTATS_MIB_INNOROUTES);
+			else if (err == -EXDEV)
+				NET_INC_STATS_BH(dev_net(skb->dev),
+						 LINUX_MIB_IPRPFILTER);
 			goto drop;
 		}
 	}
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3dc9914c1dce..e320ca6b3ef3 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -252,6 +252,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
 	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
 	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
+	SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8495bceec764..d377b45005fc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1851,6 +1851,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	__be32 spec_dst;
 	struct in_device *in_dev = in_dev_get(dev);
 	u32 itag = 0;
+	int err;
 
 	/* Primary sanity checks. */
 
@@ -1865,10 +1866,12 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		if (!ipv4_is_local_multicast(daddr))
 			goto e_inval;
 		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
-	} else if (fib_validate_source(saddr, 0, tos, 0,
-					dev, &spec_dst, &itag, 0) < 0)
-		goto e_inval;
-
+	} else {
+		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
+					  &itag, 0);
+		if (err < 0)
+			goto e_err;
+	}
 	rth = dst_alloc(&ipv4_dst_ops);
 	if (!rth)
 		goto e_nobufs;
@@ -1920,8 +1923,10 @@ e_nobufs:
 	return -ENOBUFS;
 
 e_inval:
+	err = -EINVAL;
+e_err:
 	in_dev_put(in_dev);
-	return -EINVAL;
+	return err;
 }
 
 
@@ -1985,7 +1990,6 @@ static int __mkroute_input(struct sk_buff *skb,
 		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
 					 saddr);
 
-		err = -EINVAL;
 		goto cleanup;
 	}
 
@@ -2157,13 +2161,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		goto brd_input;
 
 	if (res.type == RTN_LOCAL) {
-		int result;
-		result = fib_validate_source(saddr, daddr, tos,
+		err = fib_validate_source(saddr, daddr, tos,
 					     net->loopback_dev->ifindex,
 					     dev, &spec_dst, &itag, skb->mark);
-		if (result < 0)
-			goto martian_source;
-		if (result)
+		if (err < 0)
+			goto martian_source_keep_err;
+		if (err)
 			flags |= RTCF_DIRECTSRC;
 		spec_dst = daddr;
 		goto local_input;
@@ -2191,7 +2194,7 @@ brd_input:
 		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
 					  &itag, skb->mark);
 		if (err < 0)
-			goto martian_source;
+			goto martian_source_keep_err;
 		if (err)
 			flags |= RTCF_DIRECTSRC;
 	}
@@ -2272,8 +2275,10 @@ e_nobufs:
 	goto done;
 
 martian_source:
+	err = -EINVAL;
+martian_source_keep_err:
 	ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
-	goto e_inval;
+	goto done;
 }
 
 int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-- 
cgit v1.2.3-58-ga151


From 96d362202bfc0e5da78ee59b1645296fbca515f4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Jun 2010 19:21:31 +0000
Subject: ipv4: RCU conversion of ip_route_input_slow/ip_route_input_mc

Avoid two atomic ops on struct in_device refcount per incoming packet,
if slow path taken, (or route cache disabled)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d377b45005fc..1cfe0d199e7c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1843,13 +1843,14 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 	rt->rt_type = res->type;
 }
 
+/* called in rcu_read_lock() section */
 static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 				u8 tos, struct net_device *dev, int our)
 {
-	unsigned hash;
+	unsigned int hash;
 	struct rtable *rth;
 	__be32 spec_dst;
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	u32 itag = 0;
 	int err;
 
@@ -1914,18 +1915,14 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 #endif
 	RT_CACHE_STAT_INC(in_slow_mc);
 
-	in_dev_put(in_dev);
 	hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
 	return rt_intern_hash(hash, rth, NULL, skb, dev->ifindex);
 
 e_nobufs:
-	in_dev_put(in_dev);
 	return -ENOBUFS;
-
 e_inval:
-	err = -EINVAL;
+	return -EINVAL;
 e_err:
-	in_dev_put(in_dev);
 	return err;
 }
 
@@ -2101,7 +2098,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 			       u8 tos, struct net_device *dev)
 {
 	struct fib_result res;
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct flowi fl = { .nl_u = { .ip4_u =
 				      { .daddr = daddr,
 					.saddr = saddr,
@@ -2179,7 +2176,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
 done:
-	in_dev_put(in_dev);
 	if (free_res)
 		fib_res_put(&res);
 out:	return err;
@@ -2288,16 +2284,18 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	unsigned	hash;
 	int iif = dev->ifindex;
 	struct net *net;
+	int res;
 
 	net = dev_net(dev);
 
+	rcu_read_lock();
+
 	if (!rt_caching(net))
 		goto skip_cache;
 
 	tos &= IPTOS_RT_MASK;
 	hash = rt_hash(daddr, saddr, iif, rt_genid(net));
 
-	rcu_read_lock();
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
 	     rth = rcu_dereference(rth->u.dst.rt_next)) {
 		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
@@ -2321,7 +2319,6 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		}
 		RT_CACHE_STAT_INC(in_hlist_search);
 	}
-	rcu_read_unlock();
 
 skip_cache:
 	/* Multicast recognition logic is moved from route cache to here.
@@ -2336,12 +2333,11 @@ skip_cache:
 	   route cache entry is created eventually.
 	 */
 	if (ipv4_is_multicast(daddr)) {
-		struct in_device *in_dev;
+		struct in_device *in_dev = __in_dev_get_rcu(dev);
 
-		rcu_read_lock();
-		if ((in_dev = __in_dev_get_rcu(dev)) != NULL) {
+		if (in_dev) {
 			int our = ip_check_mc(in_dev, daddr, saddr,
-				ip_hdr(skb)->protocol);
+					      ip_hdr(skb)->protocol);
 			if (our
 #ifdef CONFIG_IP_MROUTE
 				||
@@ -2349,15 +2345,18 @@ skip_cache:
 			     IN_DEV_MFORWARD(in_dev))
 #endif
 			   ) {
+				int res = ip_route_input_mc(skb, daddr, saddr,
+							    tos, dev, our);
 				rcu_read_unlock();
-				return ip_route_input_mc(skb, daddr, saddr,
-							 tos, dev, our);
+				return res;
 			}
 		}
 		rcu_read_unlock();
 		return -EINVAL;
 	}
-	return ip_route_input_slow(skb, daddr, saddr, tos, dev);
+	res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
+	rcu_read_unlock();
+	return res;
 }
 EXPORT_SYMBOL(ip_route_input_common);
 
-- 
cgit v1.2.3-58-ga151


From bc10502dba37d3b210efd9f3867212298f13b78e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 03:21:52 -0700
Subject: net: use __packed annotation

cleanup patch.

Use new __packed annotation in net/ and include/
(except netfilter)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h   |  2 +-
 include/linux/if_fddi.h    |  8 ++---
 include/linux/if_frad.h    |  2 +-
 include/linux/if_hippi.h   |  8 ++---
 include/linux/if_pppox.h   |  8 ++---
 include/linux/ipv6.h       |  4 +--
 include/linux/isdnif.h     |  2 +-
 include/linux/mISDNif.h    |  2 +-
 include/linux/nbd.h        |  2 +-
 include/linux/ncp.h        | 10 +++---
 include/linux/ncp_fs_sb.h  | 14 ++++----
 include/linux/phonet.h     |  4 +--
 include/linux/rds.h        | 12 +++----
 include/linux/sctp.h       | 80 +++++++++++++++++++++++-----------------------
 include/linux/wlp.h        | 22 ++++++-------
 include/net/dn_dev.h       |  8 ++---
 include/net/dn_nsp.h       | 16 +++++-----
 include/net/ip6_tunnel.h   |  2 +-
 include/net/ipx.h          |  8 ++---
 include/net/mip6.h         |  2 +-
 include/net/ndisc.h        |  2 +-
 include/net/sctp/structs.h |  4 +--
 include/rxrpc/packet.h     |  8 ++---
 net/bluetooth/bnep/bnep.h  |  8 ++---
 net/compat.c               |  6 ++--
 net/iucv/iucv.c            | 14 ++++----
 net/mac80211/cfg.c         |  2 +-
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/rx.c          |  2 +-
 net/sctp/sm_make_chunk.c   |  2 +-
 30 files changed, 133 insertions(+), 133 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index bed7a4682b90..c831467774d0 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -119,7 +119,7 @@ struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
 	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
 	__be16		h_proto;		/* packet type ID field	*/
-} __attribute__((packed));
+} __packed;
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h
index 5459c5c09930..9947c39e62f6 100644
--- a/include/linux/if_fddi.h
+++ b/include/linux/if_fddi.h
@@ -67,7 +67,7 @@ struct fddi_8022_1_hdr {
 	__u8	dsap;					/* destination service access point */
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl;					/* control byte #1 */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define 802.2 Type 2 header */
 struct fddi_8022_2_hdr {
@@ -75,7 +75,7 @@ struct fddi_8022_2_hdr {
 	__u8	ssap;					/* source service access point */
 	__u8	ctrl_1;					/* control byte #1 */
 	__u8	ctrl_2;					/* control byte #2 */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define 802.2 SNAP header */
 #define FDDI_K_OUI_LEN	3
@@ -85,7 +85,7 @@ struct fddi_snap_hdr {
 	__u8	ctrl;					/* always 0x03 */
 	__u8	oui[FDDI_K_OUI_LEN];	/* organizational universal id */
 	__be16	ethertype;				/* packet type ID field */
-} __attribute__ ((packed));
+} __packed;
 
 /* Define FDDI LLC frame header */
 struct fddihdr {
@@ -98,7 +98,7 @@ struct fddihdr {
 		struct fddi_8022_2_hdr		llc_8022_2;
 		struct fddi_snap_hdr		llc_snap;
 		} hdr;
-} __attribute__ ((packed));
+} __packed;
 
 #ifdef __KERNEL__
 #include <linux/netdevice.h>
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
index 80b3a1056a5f..191ee0869bc1 100644
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -135,7 +135,7 @@ struct frhdr
    __be16 PID;
 
 #define IP_NLPID pad 
-} __attribute__((packed));
+} __packed;
 
 /* see RFC 1490 for the definition of the following */
 #define FRAD_I_UI		0x03
diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h
index 8d038eb8db5c..5fe5f307c6f5 100644
--- a/include/linux/if_hippi.h
+++ b/include/linux/if_hippi.h
@@ -104,7 +104,7 @@ struct hippi_fp_hdr {
 	__be32		fixed;
 #endif
 	__be32		d2_size;
-} __attribute__ ((packed));
+} __packed;
 
 struct hippi_le_hdr {
 #if defined (__BIG_ENDIAN_BITFIELD)
@@ -129,7 +129,7 @@ struct hippi_le_hdr {
 	__u8		daddr[HIPPI_ALEN];
 	__u16		locally_administered;
 	__u8		saddr[HIPPI_ALEN];
-} __attribute__ ((packed));
+} __packed;
 
 #define HIPPI_OUI_LEN	3
 /*
@@ -142,12 +142,12 @@ struct hippi_snap_hdr {
 	__u8	ctrl;			/* always 0x03 */
 	__u8	oui[HIPPI_OUI_LEN];	/* organizational universal id (zero)*/
 	__be16	ethertype;		/* packet type ID field */
-} __attribute__ ((packed));
+} __packed;
 
 struct hippi_hdr {
 	struct hippi_fp_hdr	fp;
 	struct hippi_le_hdr	le;
 	struct hippi_snap_hdr	snap;
-} __attribute__ ((packed));
+} __packed;
 
 #endif	/* _LINUX_IF_HIPPI_H */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index a6577af0c4e6..1925e0c3f162 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -59,7 +59,7 @@ struct sockaddr_pppox {
        union{ 
                struct pppoe_addr       pppoe; 
        }sa_addr; 
-}__attribute__ ((packed)); 
+} __packed;
 
 /* The use of the above union isn't viable because the size of this
  * struct must stay fixed over time -- applications use sizeof(struct
@@ -70,7 +70,7 @@ struct sockaddr_pppol2tp {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tp_addr pppol2tp;
-}__attribute__ ((packed));
+} __packed;
 
 /* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
  * bits. So we need a different sockaddr structure.
@@ -79,7 +79,7 @@ struct sockaddr_pppol2tpv3 {
 	sa_family_t     sa_family;      /* address family, AF_PPPOX */
 	unsigned int    sa_protocol;    /* protocol identifier */
 	struct pppol2tpv3_addr pppol2tp;
-} __attribute__ ((packed));
+} __packed;
 
 /*********************************************************************
  *
@@ -129,7 +129,7 @@ struct pppoe_hdr {
 	__be16 sid;
 	__be16 length;
 	struct pppoe_tag tag[0];
-} __attribute__ ((packed));
+} __packed;
 
 /* Length of entire PPPoE + PPP header */
 #define PPPOE_SES_HLEN	8
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 99e1ab7e3eec..940e21595351 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -58,7 +58,7 @@ struct ipv6_opt_hdr {
 	/* 
 	 * TLV encoded option data follows.
 	 */
-} __attribute__ ((packed));	/* required for some archs */
+} __packed;	/* required for some archs */
 
 #define ipv6_destopt_hdr ipv6_opt_hdr
 #define ipv6_hopopt_hdr  ipv6_opt_hdr
@@ -99,7 +99,7 @@ struct ipv6_destopt_hao {
 	__u8			type;
 	__u8			length;
 	struct in6_addr		addr;
-} __attribute__ ((__packed__));
+} __packed;
 
 /*
  *	IPv6 fixed header
diff --git a/include/linux/isdnif.h b/include/linux/isdnif.h
index b9b5a684ed69..b8c23f88dd54 100644
--- a/include/linux/isdnif.h
+++ b/include/linux/isdnif.h
@@ -317,7 +317,7 @@ typedef struct T30_s {
 	__u8 r_scantime;
 	__u8 r_id[FAXIDLEN];
 	__u8 r_code;
-} __attribute__((packed)) T30_s;
+} __packed T30_s;
 
 #define ISDN_TTY_FAX_CONN_IN	0
 #define ISDN_TTY_FAX_CONN_OUT	1
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 78c3bed1c3f5..b5e7f2202484 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -251,7 +251,7 @@
 struct mISDNhead {
 	unsigned int	prim;
 	unsigned int	id;
-}  __attribute__((packed));
+}  __packed;
 
 #define MISDN_HEADER_LEN	sizeof(struct mISDNhead)
 #define MAX_DATA_SIZE		2048
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index 155719dab813..bb58854a8061 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -88,7 +88,7 @@ struct nbd_request {
 	char handle[8];
 	__be64 from;
 	__be32 len;
-} __attribute__ ((packed));
+} __packed;
 
 /*
  * This is the reply packet that nbd-server sends back to the client after
diff --git a/include/linux/ncp.h b/include/linux/ncp.h
index 99f0adeeb3f3..3ace8370e61e 100644
--- a/include/linux/ncp.h
+++ b/include/linux/ncp.h
@@ -27,7 +27,7 @@ struct ncp_request_header {
 	__u8 conn_high;
 	__u8 function;
 	__u8 data[0];
-} __attribute__((packed));
+} __packed;
 
 #define NCP_REPLY                (0x3333)
 #define NCP_WATCHDOG		 (0x3E3E)
@@ -42,7 +42,7 @@ struct ncp_reply_header {
 	__u8 completion_code;
 	__u8 connection_state;
 	__u8 data[0];
-} __attribute__((packed));
+} __packed;
 
 #define NCP_VOLNAME_LEN (16)
 #define NCP_NUMBER_OF_VOLUMES (256)
@@ -158,7 +158,7 @@ struct nw_info_struct {
 #ifdef __KERNEL__
 	struct nw_nfs_info nfs;
 #endif
-} __attribute__((packed));
+} __packed;
 
 /* modify mask - use with MODIFY_DOS_INFO structure */
 #define DM_ATTRIBUTES		  (cpu_to_le32(0x02))
@@ -190,12 +190,12 @@ struct nw_modify_dos_info {
 	__u16 inheritanceGrantMask;
 	__u16 inheritanceRevokeMask;
 	__u32 maximumSpace;
-} __attribute__((packed));
+} __packed;
 
 struct nw_search_sequence {
 	__u8 volNumber;
 	__u32 dirBase;
 	__u32 sequence;
-} __attribute__((packed));
+} __packed;
 
 #endif				/* _LINUX_NCP_H */
diff --git a/include/linux/ncp_fs_sb.h b/include/linux/ncp_fs_sb.h
index 5ec9ca671687..8da05bc098ca 100644
--- a/include/linux/ncp_fs_sb.h
+++ b/include/linux/ncp_fs_sb.h
@@ -104,13 +104,13 @@ struct ncp_server {
 
 		unsigned int state;		/* STREAM only: receiver state */
 		struct {
-			__u32 magic __attribute__((packed));
-			__u32 len __attribute__((packed));
-			__u16 type __attribute__((packed));
-			__u16 p1 __attribute__((packed));
-			__u16 p2 __attribute__((packed));
-			__u16 p3 __attribute__((packed));
-			__u16 type2 __attribute__((packed));
+			__u32 magic __packed;
+			__u32 len __packed;
+			__u16 type __packed;
+			__u16 p1 __packed;
+			__u16 p2 __packed;
+			__u16 p3 __packed;
+			__u16 type2 __packed;
 		} buf;				/* STREAM only: temporary buffer */
 		unsigned char* ptr;		/* STREAM only: pointer to data */
 		size_t len;			/* STREAM only: length of data to receive */
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index e5126cff9b2a..24426c3d6b5a 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -56,7 +56,7 @@ struct phonethdr {
 	__be16	pn_length;
 	__u8	pn_robj;
 	__u8	pn_sobj;
-} __attribute__((packed));
+} __packed;
 
 /* Common Phonet payload header */
 struct phonetmsg {
@@ -98,7 +98,7 @@ struct sockaddr_pn {
 	__u8 spn_dev;
 	__u8 spn_resource;
 	__u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3];
-} __attribute__ ((packed));
+} __packed;
 
 /* Well known address */
 #define PN_DEV_PC	0x10
diff --git a/include/linux/rds.h b/include/linux/rds.h
index cab4994c2f63..24bce3ded9ea 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -100,7 +100,7 @@
 struct rds_info_counter {
 	u_int8_t	name[32];
 	u_int64_t	value;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_INFO_CONNECTION_FLAG_SENDING	0x01
 #define RDS_INFO_CONNECTION_FLAG_CONNECTING	0x02
@@ -115,7 +115,7 @@ struct rds_info_connection {
 	__be32		faddr;
 	u_int8_t	transport[TRANSNAMSIZ];		/* null term ascii */
 	u_int8_t	flags;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_flow {
 	__be32		laddr;
@@ -123,7 +123,7 @@ struct rds_info_flow {
 	u_int32_t	bytes;
 	__be16		lport;
 	__be16		fport;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_INFO_MESSAGE_FLAG_ACK               0x01
 #define RDS_INFO_MESSAGE_FLAG_FAST_ACK          0x02
@@ -136,7 +136,7 @@ struct rds_info_message {
 	__be16		lport;
 	__be16		fport;
 	u_int8_t	flags;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_socket {
 	u_int32_t	sndbuf;
@@ -146,7 +146,7 @@ struct rds_info_socket {
 	__be16		connected_port;
 	u_int32_t	rcvbuf;
 	u_int64_t	inum;
-} __attribute__((packed));
+} __packed;
 
 struct rds_info_tcp_socket {
 	__be32          local_addr;
@@ -158,7 +158,7 @@ struct rds_info_tcp_socket {
 	u_int32_t       last_sent_nxt;
 	u_int32_t       last_expected_una;
 	u_int32_t       last_seen_una;
-} __attribute__((packed));
+} __packed;
 
 #define RDS_IB_GID_LEN	16
 struct rds_info_rdma_connection {
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index c20d3ce673c0..c11a28706fa4 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -61,7 +61,7 @@ typedef struct sctphdr {
 	__be16 dest;
 	__be32 vtag;
 	__le32 checksum;
-} __attribute__((packed)) sctp_sctphdr_t;
+} __packed sctp_sctphdr_t;
 
 #ifdef __KERNEL__
 #include <linux/skbuff.h>
@@ -77,7 +77,7 @@ typedef struct sctp_chunkhdr {
 	__u8 type;
 	__u8 flags;
 	__be16 length;
-} __attribute__((packed)) sctp_chunkhdr_t;
+} __packed sctp_chunkhdr_t;
 
 
 /* Section 3.2.  Chunk Type Values.
@@ -167,7 +167,7 @@ enum { SCTP_CHUNK_FLAG_T = 0x01 };
 typedef struct sctp_paramhdr {
 	__be16 type;
 	__be16 length;
-} __attribute__((packed)) sctp_paramhdr_t;
+} __packed sctp_paramhdr_t;
 
 typedef enum {
 
@@ -228,12 +228,12 @@ typedef struct sctp_datahdr {
 	__be16 ssn;
 	__be32 ppid;
 	__u8  payload[0];
-} __attribute__((packed)) sctp_datahdr_t;
+} __packed sctp_datahdr_t;
 
 typedef struct sctp_data_chunk {
         sctp_chunkhdr_t chunk_hdr;
         sctp_datahdr_t  data_hdr;
-} __attribute__((packed)) sctp_data_chunk_t;
+} __packed sctp_data_chunk_t;
 
 /* DATA Chuck Specific Flags */
 enum {
@@ -259,78 +259,78 @@ typedef struct sctp_inithdr {
 	__be16 num_inbound_streams;
 	__be32 initial_tsn;
 	__u8  params[0];
-} __attribute__((packed)) sctp_inithdr_t;
+} __packed sctp_inithdr_t;
 
 typedef struct sctp_init_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_inithdr_t init_hdr;
-} __attribute__((packed)) sctp_init_chunk_t;
+} __packed sctp_init_chunk_t;
 
 
 /* Section 3.3.2.1. IPv4 Address Parameter (5) */
 typedef struct sctp_ipv4addr_param {
 	sctp_paramhdr_t param_hdr;
 	struct in_addr  addr;
-} __attribute__((packed)) sctp_ipv4addr_param_t;
+} __packed sctp_ipv4addr_param_t;
 
 /* Section 3.3.2.1. IPv6 Address Parameter (6) */
 typedef struct sctp_ipv6addr_param {
 	sctp_paramhdr_t param_hdr;
 	struct in6_addr addr;
-} __attribute__((packed)) sctp_ipv6addr_param_t;
+} __packed sctp_ipv6addr_param_t;
 
 /* Section 3.3.2.1 Cookie Preservative (9) */
 typedef struct sctp_cookie_preserve_param {
 	sctp_paramhdr_t param_hdr;
 	__be32          lifespan_increment;
-} __attribute__((packed)) sctp_cookie_preserve_param_t;
+} __packed sctp_cookie_preserve_param_t;
 
 /* Section 3.3.2.1 Host Name Address (11) */
 typedef struct sctp_hostname_param {
 	sctp_paramhdr_t param_hdr;
 	uint8_t hostname[0];
-} __attribute__((packed)) sctp_hostname_param_t;
+} __packed sctp_hostname_param_t;
 
 /* Section 3.3.2.1 Supported Address Types (12) */
 typedef struct sctp_supported_addrs_param {
 	sctp_paramhdr_t param_hdr;
 	__be16 types[0];
-} __attribute__((packed)) sctp_supported_addrs_param_t;
+} __packed sctp_supported_addrs_param_t;
 
 /* Appendix A. ECN Capable (32768) */
 typedef struct sctp_ecn_capable_param {
 	sctp_paramhdr_t param_hdr;
-} __attribute__((packed)) sctp_ecn_capable_param_t;
+} __packed sctp_ecn_capable_param_t;
 
 /* ADDIP Section 3.2.6 Adaptation Layer Indication */
 typedef struct sctp_adaptation_ind_param {
 	struct sctp_paramhdr param_hdr;
 	__be32 adaptation_ind;
-} __attribute__((packed)) sctp_adaptation_ind_param_t;
+} __packed sctp_adaptation_ind_param_t;
 
 /* ADDIP Section 4.2.7 Supported Extensions Parameter */
 typedef struct sctp_supported_ext_param {
 	struct sctp_paramhdr param_hdr;
 	__u8 chunks[0];
-} __attribute__((packed)) sctp_supported_ext_param_t;
+} __packed sctp_supported_ext_param_t;
 
 /* AUTH Section 3.1 Random */
 typedef struct sctp_random_param {
 	sctp_paramhdr_t param_hdr;
 	__u8 random_val[0];
-} __attribute__((packed)) sctp_random_param_t;
+} __packed sctp_random_param_t;
 
 /* AUTH Section 3.2 Chunk List */
 typedef struct sctp_chunks_param {
 	sctp_paramhdr_t param_hdr;
 	__u8 chunks[0];
-} __attribute__((packed)) sctp_chunks_param_t;
+} __packed sctp_chunks_param_t;
 
 /* AUTH Section 3.3 HMAC Algorithm */
 typedef struct sctp_hmac_algo_param {
 	sctp_paramhdr_t param_hdr;
 	__be16 hmac_ids[0];
-} __attribute__((packed)) sctp_hmac_algo_param_t;
+} __packed sctp_hmac_algo_param_t;
 
 /* RFC 2960.  Section 3.3.3 Initiation Acknowledgement (INIT ACK) (2):
  *   The INIT ACK chunk is used to acknowledge the initiation of an SCTP
@@ -342,13 +342,13 @@ typedef sctp_init_chunk_t sctp_initack_chunk_t;
 typedef struct sctp_cookie_param {
 	sctp_paramhdr_t p;
 	__u8 body[0];
-} __attribute__((packed)) sctp_cookie_param_t;
+} __packed sctp_cookie_param_t;
 
 /* Section 3.3.3.1 Unrecognized Parameters (8) */
 typedef struct sctp_unrecognized_param {
 	sctp_paramhdr_t param_hdr;
 	sctp_paramhdr_t unrecognized;
-} __attribute__((packed)) sctp_unrecognized_param_t;
+} __packed sctp_unrecognized_param_t;
 
 
@@ -363,7 +363,7 @@ typedef struct sctp_unrecognized_param {
 typedef struct sctp_gap_ack_block {
 	__be16 start;
 	__be16 end;
-} __attribute__((packed)) sctp_gap_ack_block_t;
+} __packed sctp_gap_ack_block_t;
 
 typedef __be32 sctp_dup_tsn_t;
 
@@ -378,12 +378,12 @@ typedef struct sctp_sackhdr {
 	__be16 num_gap_ack_blocks;
 	__be16 num_dup_tsns;
 	sctp_sack_variable_t variable[0];
-} __attribute__((packed)) sctp_sackhdr_t;
+} __packed sctp_sackhdr_t;
 
 typedef struct sctp_sack_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_sackhdr_t sack_hdr;
-} __attribute__((packed)) sctp_sack_chunk_t;
+} __packed sctp_sack_chunk_t;
 
 
 /* RFC 2960.  Section 3.3.5 Heartbeat Request (HEARTBEAT) (4):
@@ -395,12 +395,12 @@ typedef struct sctp_sack_chunk {
 
 typedef struct sctp_heartbeathdr {
 	sctp_paramhdr_t info;
-} __attribute__((packed)) sctp_heartbeathdr_t;
+} __packed sctp_heartbeathdr_t;
 
 typedef struct sctp_heartbeat_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_heartbeathdr_t hb_hdr;
-} __attribute__((packed)) sctp_heartbeat_chunk_t;
+} __packed sctp_heartbeat_chunk_t;
 
 
 /* For the abort and shutdown ACK we must carry the init tag in the
@@ -409,7 +409,7 @@ typedef struct sctp_heartbeat_chunk {
  */
 typedef struct sctp_abort_chunk {
         sctp_chunkhdr_t uh;
-} __attribute__((packed)) sctp_abort_chunk_t;
+} __packed sctp_abort_chunk_t;
 
 
 /* For the graceful shutdown we must carry the tag (in common header)
@@ -417,12 +417,12 @@ typedef struct sctp_abort_chunk {
  */
 typedef struct sctp_shutdownhdr {
 	__be32 cum_tsn_ack;
-} __attribute__((packed)) sctp_shutdownhdr_t;
+} __packed sctp_shutdownhdr_t;
 
 struct sctp_shutdown_chunk_t {
         sctp_chunkhdr_t    chunk_hdr;
         sctp_shutdownhdr_t shutdown_hdr;
-} __attribute__ ((packed));
+} __packed;
 
 /* RFC 2960.  Section 3.3.10 Operation Error (ERROR) (9) */
 
@@ -430,12 +430,12 @@ typedef struct sctp_errhdr {
 	__be16 cause;
 	__be16 length;
 	__u8  variable[0];
-} __attribute__((packed)) sctp_errhdr_t;
+} __packed sctp_errhdr_t;
 
 typedef struct sctp_operr_chunk {
         sctp_chunkhdr_t chunk_hdr;
 	sctp_errhdr_t   err_hdr;
-} __attribute__((packed)) sctp_operr_chunk_t;
+} __packed sctp_operr_chunk_t;
 
 /* RFC 2960 3.3.10 - Operation Error
  *
@@ -525,7 +525,7 @@ typedef struct sctp_ecnehdr {
 typedef struct sctp_ecne_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_ecnehdr_t ence_hdr;
-} __attribute__((packed)) sctp_ecne_chunk_t;
+} __packed sctp_ecne_chunk_t;
 
 /* RFC 2960.  Appendix A.  Explicit Congestion Notification.
  *   Congestion Window Reduced (CWR) (13)
@@ -537,7 +537,7 @@ typedef struct sctp_cwrhdr {
 typedef struct sctp_cwr_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_cwrhdr_t cwr_hdr;
-} __attribute__((packed)) sctp_cwr_chunk_t;
+} __packed sctp_cwr_chunk_t;
 
 /* PR-SCTP
  * 3.2 Forward Cumulative TSN Chunk Definition (FORWARD TSN)
@@ -588,17 +588,17 @@ typedef struct sctp_cwr_chunk {
 struct sctp_fwdtsn_skip {
 	__be16 stream;
 	__be16 ssn;
-} __attribute__((packed));
+} __packed;
 
 struct sctp_fwdtsn_hdr {
 	__be32 new_cum_tsn;
 	struct sctp_fwdtsn_skip skip[0];
-} __attribute((packed));
+} __packed;
 
 struct sctp_fwdtsn_chunk {
 	struct sctp_chunkhdr chunk_hdr;
 	struct sctp_fwdtsn_hdr fwdtsn_hdr;
-} __attribute((packed));
+} __packed;
 
 
 /* ADDIP
@@ -636,17 +636,17 @@ struct sctp_fwdtsn_chunk {
 typedef struct sctp_addip_param {
 	sctp_paramhdr_t	param_hdr;
 	__be32		crr_id;
-} __attribute__((packed)) sctp_addip_param_t;
+} __packed sctp_addip_param_t;
 
 typedef struct sctp_addiphdr {
 	__be32	serial;
 	__u8	params[0];
-} __attribute__((packed)) sctp_addiphdr_t;
+} __packed sctp_addiphdr_t;
 
 typedef struct sctp_addip_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_addiphdr_t addip_hdr;
-} __attribute__((packed)) sctp_addip_chunk_t;
+} __packed sctp_addip_chunk_t;
 
 /* AUTH
  * Section 4.1  Authentication Chunk (AUTH)
@@ -701,11 +701,11 @@ typedef struct sctp_authhdr {
 	__be16 shkey_id;
 	__be16 hmac_id;
 	__u8   hmac[0];
-} __attribute__((packed)) sctp_authhdr_t;
+} __packed sctp_authhdr_t;
 
 typedef struct sctp_auth_chunk {
 	sctp_chunkhdr_t chunk_hdr;
 	sctp_authhdr_t auth_hdr;
-} __attribute__((packed)) sctp_auth_chunk_t;
+} __packed sctp_auth_chunk_t;
 
 #endif /* __LINUX_SCTP_H__ */
diff --git a/include/linux/wlp.h b/include/linux/wlp.h
index ac95ce6606ac..c76fe2392506 100644
--- a/include/linux/wlp.h
+++ b/include/linux/wlp.h
@@ -300,7 +300,7 @@ struct wlp_ie {
 	__le16 cycle_param;
 	__le16 acw_anchor_addr;
 	u8 wssid_hash_list[];
-} __attribute__((packed));
+} __packed;
 
 static inline int wlp_ie_hash_length(struct wlp_ie *ie)
 {
@@ -324,7 +324,7 @@ static inline void wlp_ie_set_hash_length(struct wlp_ie *ie, int hash_length)
  */
 struct wlp_nonce {
 	u8 data[16];
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP UUID
@@ -336,7 +336,7 @@ struct wlp_nonce {
  */
 struct wlp_uuid {
 	u8 data[16];
-} __attribute__((packed));
+} __packed;
 
 
 /**
@@ -348,7 +348,7 @@ struct wlp_dev_type {
 	u8 OUI[3];
 	u8 OUIsubdiv;
 	__le16 subID;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP frame header
@@ -357,7 +357,7 @@ struct wlp_dev_type {
 struct wlp_frame_hdr {
 	__le16 mux_hdr;			/* WLP_PROTOCOL_ID */
 	enum wlp_frame_type type:8;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP attribute field header
@@ -368,7 +368,7 @@ struct wlp_frame_hdr {
 struct wlp_attr_hdr {
 	__le16 type;
 	__le16 length;
-} __attribute__((packed));
+} __packed;
 
 /**
  * Device information commonly used together
@@ -401,13 +401,13 @@ struct wlp_device_info {
 struct wlp_attr_##name {						\
 	struct wlp_attr_hdr hdr;					\
 	type name;							\
-} __attribute__((packed));
+} __packed;
 
 #define wlp_attr_array(type, name)					\
 struct wlp_attr_##name {						\
 	struct wlp_attr_hdr hdr;					\
 	type name[];							\
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP association attribute fields
@@ -483,7 +483,7 @@ struct wlp_wss_info {
 	struct wlp_attr_accept_enrl accept;
 	struct wlp_attr_wss_sec_status sec_stat;
 	struct wlp_attr_wss_bcast bcast;
-} __attribute__((packed));
+} __packed;
 
 /* WLP WSS Information */
 wlp_attr_array(struct wlp_wss_info, wss_info)
@@ -520,7 +520,7 @@ wlp_attr(u8, wlp_assc_err)
 struct wlp_frame_std_abbrv_hdr {
 	struct wlp_frame_hdr hdr;
 	u8 tag;
-} __attribute__((packed));
+} __packed;
 
 /**
  * WLP association frames
@@ -533,7 +533,7 @@ struct wlp_frame_assoc {
 	struct wlp_attr_version version;
 	struct wlp_attr_msg_type msg_type;
 	u8 attr[];
-} __attribute__((packed));
+} __packed;
 
 /* Ethernet to dev address mapping */
 struct wlp_eda {
diff --git a/include/net/dn_dev.h b/include/net/dn_dev.h
index 511a459ec10f..0916bbf3bdff 100644
--- a/include/net/dn_dev.h
+++ b/include/net/dn_dev.h
@@ -101,7 +101,7 @@ struct dn_short_packet {
 	__le16 dstnode;
 	__le16 srcnode;
 	__u8   forward;
-} __attribute__((packed));
+} __packed;
 
 struct dn_long_packet {
 	__u8   msgflg;
@@ -115,7 +115,7 @@ struct dn_long_packet {
 	__u8   visit_ct;
 	__u8   s_class;
 	__u8   pt;
-} __attribute__((packed));
+} __packed;
 
 /*------------------------- DRP - Routing messages ---------------------*/
 
@@ -132,7 +132,7 @@ struct endnode_hello_message {
 	__u8   mpd;
 	__u8   datalen;
 	__u8   data[2];
-} __attribute__((packed));
+} __packed;
 
 struct rtnode_hello_message {
 	__u8   msgflg;
@@ -144,7 +144,7 @@ struct rtnode_hello_message {
 	__u8   area;
 	__le16  timer;
 	__u8   mpd;
-} __attribute__((packed));
+} __packed;
 
 
 extern void dn_dev_init(void);
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
index 17d43d2db5ec..e43a2893f132 100644
--- a/include/net/dn_nsp.h
+++ b/include/net/dn_nsp.h
@@ -74,18 +74,18 @@ struct nsp_data_seg_msg {
 	__u8   msgflg;
 	__le16 dstaddr;
 	__le16 srcaddr;
-} __attribute__((packed));
+} __packed;
 
 struct nsp_data_opt_msg {
 	__le16 acknum;
 	__le16 segnum;
 	__le16 lsflgs;
-} __attribute__((packed));
+} __packed;
 
 struct nsp_data_opt_msg1 {
 	__le16 acknum;
 	__le16 segnum;
-} __attribute__((packed));
+} __packed;
 
 
 /* Acknowledgment Message (data/other data)                             */
@@ -94,13 +94,13 @@ struct nsp_data_ack_msg {
 	__le16 dstaddr;
 	__le16 srcaddr;
 	__le16 acknum;
-} __attribute__((packed));
+} __packed;
 
 /* Connect Acknowledgment Message */
 struct  nsp_conn_ack_msg {
 	__u8 msgflg;
 	__le16 dstaddr;
-} __attribute__((packed));
+} __packed;
 
 
 /* Connect Initiate/Retransmit Initiate/Connect Confirm */
@@ -117,7 +117,7 @@ struct  nsp_conn_init_msg {
 #define NSP_FC_MASK   0x0c            /* FC type mask         */
 	__u8   info;
 	__le16 segsize;
-} __attribute__((packed));
+} __packed;
 
 /* Disconnect Initiate/Disconnect Confirm */
 struct  nsp_disconn_init_msg {
@@ -125,7 +125,7 @@ struct  nsp_disconn_init_msg {
 	__le16 dstaddr;
 	__le16 srcaddr;
 	__le16 reason;
-} __attribute__((packed));
+} __packed;
 
 
@@ -135,7 +135,7 @@ struct  srcobj_fmt {
 	__le16 grpcode;
 	__le16 usrcode;
 	__u8   dlen;
-} __attribute__((packed));
+} __packed;
 
 /*
  * A collection of functions for manipulating the sequence
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index fbf9d1cda27b..fc94ec568a50 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -27,6 +27,6 @@ struct ipv6_tlv_tnl_enc_lim {
 	__u8 type;		/* type-code for option         */
 	__u8 length;		/* option length                */
 	__u8 encap_limit;	/* tunnel encapsulation limit   */
-} __attribute__ ((packed));
+} __packed;
 
 #endif
diff --git a/include/net/ipx.h b/include/net/ipx.h
index ef51a668ba19..05d7e4a88b49 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -27,9 +27,9 @@ struct ipx_address {
 #define IPX_MAX_PPROP_HOPS 8
 
 struct ipxhdr {
-	__be16			ipx_checksum __attribute__ ((packed));
+	__be16			ipx_checksum __packed;
 #define IPX_NO_CHECKSUM	cpu_to_be16(0xFFFF)
-	__be16			ipx_pktsize __attribute__ ((packed));
+	__be16			ipx_pktsize __packed;
 	__u8			ipx_tctrl;
 	__u8			ipx_type;
 #define IPX_TYPE_UNKNOWN	0x00
@@ -38,8 +38,8 @@ struct ipxhdr {
 #define IPX_TYPE_SPX		0x05	/* SPX protocol */
 #define IPX_TYPE_NCP		0x11	/* $lots for docs on this (SPIT) */
 #define IPX_TYPE_PPROP		0x14	/* complicated flood fill brdcast */
-	struct ipx_address	ipx_dest __attribute__ ((packed));
-	struct ipx_address	ipx_source __attribute__ ((packed));
+	struct ipx_address	ipx_dest __packed;
+	struct ipx_address	ipx_source __packed;
 };
 
 static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
diff --git a/include/net/mip6.h b/include/net/mip6.h
index a83ad1982a90..26ba99b5a4b1 100644
--- a/include/net/mip6.h
+++ b/include/net/mip6.h
@@ -39,7 +39,7 @@ struct ip6_mh {
 	__u16	ip6mh_cksum;
 	/* Followed by type specific messages */
 	__u8	data[0];
-} __attribute__ ((__packed__));
+} __packed;
 
 #define IP6_MH_TYPE_BRR		0   /* Binding Refresh Request */
 #define IP6_MH_TYPE_HOTI	1   /* HOTI Message   */
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index f76f22d05721..895997bc2ead 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -82,7 +82,7 @@ struct ra_msg {
 struct nd_opt_hdr {
 	__u8		nd_opt_type;
 	__u8		nd_opt_len;
-} __attribute__((__packed__));
+} __packed;
 
 
 extern int			ndisc_init(void);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 4b860116e096..f9e7473613bd 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -443,7 +443,7 @@ struct sctp_signed_cookie {
 	__u8 signature[SCTP_SECRET_SIZE];
 	__u32 __pad;		/* force sctp_cookie alignment to 64 bits */
 	struct sctp_cookie c;
-} __attribute__((packed));
+} __packed;
 
 /* This is another convenience type to allocate memory for address
  * params for the maximum size and pass such structures around
@@ -488,7 +488,7 @@ typedef struct sctp_sender_hb_info {
 	union sctp_addr daddr;
 	unsigned long sent_at;
 	__u64 hb_nonce;
-} __attribute__((packed)) sctp_sender_hb_info_t;
+} __packed sctp_sender_hb_info_t;
 
 /*
  *  RFC 2960 1.3.2 Sequenced Delivery within Streams
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index b69e6e173ea1..9b2c30897e50 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -65,7 +65,7 @@ struct rxrpc_header {
 	};
 	__be16		serviceId;	/* service ID */
 
-} __attribute__((packed));
+} __packed;
 
 #define __rxrpc_header_off(X) offsetof(struct rxrpc_header,X)
 
@@ -120,7 +120,7 @@ struct rxrpc_ackpacket {
 #define RXRPC_ACK_TYPE_NACK		0
 #define RXRPC_ACK_TYPE_ACK		1
 
-} __attribute__((packed));
+} __packed;
 
 /*
  * ACK packets can have a further piece of information tagged on the end
@@ -141,7 +141,7 @@ struct rxkad_challenge {
 	__be32		nonce;		/* encrypted random number */
 	__be32		min_level;	/* minimum security level */
 	__be32		__padding;	/* padding to 8-byte boundary */
-} __attribute__((packed));
+} __packed;
 
 /*****************************************************************************/
 /*
@@ -164,7 +164,7 @@ struct rxkad_response {
 
 	__be32		kvno;		/* Kerberos key version number */
 	__be32		ticket_len;	/* Kerberos ticket length  */
-} __attribute__((packed));
+} __packed;
 
 /*****************************************************************************/
 /*
diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h
index 0d9e506f5d5a..70672544db86 100644
--- a/net/bluetooth/bnep/bnep.h
+++ b/net/bluetooth/bnep/bnep.h
@@ -86,26 +86,26 @@ struct bnep_setup_conn_req {
 	__u8  ctrl;
 	__u8  uuid_size;
 	__u8  service[0];
-} __attribute__((packed));
+} __packed;
 
 struct bnep_set_filter_req {
 	__u8  type;
 	__u8  ctrl;
 	__be16 len;
 	__u8  list[0];
-} __attribute__((packed));
+} __packed;
 
 struct bnep_control_rsp {
 	__u8  type;
 	__u8  ctrl;
 	__be16 resp;
-} __attribute__((packed));
+} __packed;
 
 struct bnep_ext_hdr {
 	__u8  type;
 	__u8  len;
 	__u8  data[0];
-} __attribute__((packed));
+} __packed;
 
 /* BNEP ioctl defines */
 #define BNEPCONNADD	_IOW('B', 200, int)
diff --git a/net/compat.c b/net/compat.c
index ec24d9edb025..1cf75905f132 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -531,7 +531,7 @@ struct compat_group_req {
 	__u32				 gr_interface;
 	struct __kernel_sockaddr_storage gr_group
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 struct compat_group_source_req {
 	__u32				 gsr_interface;
@@ -539,7 +539,7 @@ struct compat_group_source_req {
 		__attribute__ ((aligned(4)));
 	struct __kernel_sockaddr_storage gsr_source
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 struct compat_group_filter {
 	__u32				 gf_interface;
@@ -549,7 +549,7 @@ struct compat_group_filter {
 	__u32				 gf_numsrc;
 	struct __kernel_sockaddr_storage gf_slist[1]
 		__attribute__ ((aligned(4)));
-} __attribute__ ((packed));
+} __packed;
 
 #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
 			sizeof(struct __kernel_sockaddr_storage))
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index f28ad2cc8428..499c045d6910 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -1463,7 +1463,7 @@ struct iucv_path_pending {
 	u32 res3;
 	u8  ippollfg;
 	u8  res4[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_pending(struct iucv_irq_data *data)
 {
@@ -1524,7 +1524,7 @@ struct iucv_path_complete {
 	u32 res3;
 	u8  ippollfg;
 	u8  res4[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_complete(struct iucv_irq_data *data)
 {
@@ -1554,7 +1554,7 @@ struct iucv_path_severed {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_severed(struct iucv_irq_data *data)
 {
@@ -1590,7 +1590,7 @@ struct iucv_path_quiesced {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_quiesced(struct iucv_irq_data *data)
 {
@@ -1618,7 +1618,7 @@ struct iucv_path_resumed {
 	u32 res4;
 	u8  ippollfg;
 	u8  res5[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_path_resumed(struct iucv_irq_data *data)
 {
@@ -1649,7 +1649,7 @@ struct iucv_message_complete {
 	u32 ipbfln2f;
 	u8  ippollfg;
 	u8  res2[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_message_complete(struct iucv_irq_data *data)
 {
@@ -1694,7 +1694,7 @@ struct iucv_message_pending {
 	u32 ipbfln2f;
 	u8  ippollfg;
 	u8  res2[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void iucv_message_pending(struct iucv_irq_data *data)
 {
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c7000a6ca379..a2ed0f7b5568 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -600,7 +600,7 @@ struct iapp_layer2_update {
 	u8 ssap;		/* 0 */
 	u8 control;
 	u8 xid_info[3];
-} __attribute__ ((packed));
+} __packed;
 
 static void ieee80211_send_layer2_update(struct sta_info *sta)
 {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1a9e2da37a93..ec3e5c3e27bd 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1084,7 +1084,7 @@ struct ieee80211_tx_status_rtap_hdr {
 	u8 padding_for_rate;
 	__le16 tx_flags;
 	u8 data_retries;
-} __attribute__ ((packed));
+} __packed;
 
 
 /* HT */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 6e2a7bcd8cb8..2d9a2ee94e17 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2139,7 +2139,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
 		u8 rate_or_pad;
 		__le16 chan_freq;
 		__le16 chan_flags;
-	} __attribute__ ((packed)) *rthdr;
+	} __packed *rthdr;
 	struct sk_buff *skb = rx->skb, *skb2;
 	struct net_device *prev_dev = NULL;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index bd2a50b482ac..246f92924658 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1817,7 +1817,7 @@ malformed:
 struct __sctp_missing {
 	__be32 num_missing;
 	__be16 type;
-}  __attribute__((packed));
+}  __packed;
 
 /*
  * Report a missing mandatory parameter.
-- 
cgit v1.2.3-58-ga151


From db2c24175d149b55784f7cb2c303622ce962c1ae Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Wed, 2 Jun 2010 04:55:02 +0000
Subject: act_pedit: access skb->data safely

access skb->data safely

we should use skb_header_pointer() and skb_store_bits() to access skb->data to
handle small or non-linear skbs.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/sched/act_pedit.c |   24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_pedit.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index fdbd0b7bd840..50e3d945e1f4 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -125,7 +125,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 {
 	struct tcf_pedit *p = a->priv;
 	int i, munged = 0;
-	u8 *pptr;
+	unsigned int off;
 
 	if (!(skb->tc_verd & TC_OK2MUNGE)) {
 		/* should we set skb->cloned? */
@@ -134,7 +134,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		}
 	}
 
-	pptr = skb_network_header(skb);
+	off = skb_network_offset(skb);
 
 	spin_lock(&p->tcf_lock);
 
@@ -144,17 +144,17 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		struct tc_pedit_key *tkey = p->tcfp_keys;
 
 		for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
-			u32 *ptr;
+			u32 *ptr, _data;
 			int offset = tkey->off;
 
 			if (tkey->offmask) {
-				if (skb->len > tkey->at) {
-					 char *j = pptr + tkey->at;
-					 offset += ((*j & tkey->offmask) >>
-						   tkey->shift);
-				} else {
+				char *d, _d;
+
+				d = skb_header_pointer(skb, off + tkey->at, 1,
+						       &_d);
+				if (!d)
 					goto bad;
-				}
+				offset += (*d & tkey->offmask) >> tkey->shift;
 			}
 
 			if (offset % 4) {
@@ -169,9 +169,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 				goto bad;
 			}
 
-			ptr = (u32 *)(pptr+offset);
+			ptr = skb_header_pointer(skb, off + offset, 4, &_data);
+			if (!ptr)
+				goto bad;
 			/* just do it, baby */
 			*ptr = ((*ptr & tkey->mask) ^ tkey->val);
+			if (ptr == &_data)
+				skb_store_bits(skb, off + offset, ptr, 4);
 			munged++;
 		}
 
-- 
cgit v1.2.3-58-ga151


From 4736022844fe694c4ee971fa2b6c1cb38dadbc78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 04:13:21 +0000
Subject: ipv4: RCU changes in __mkroute_input()

Avoid two atomic ops on output device refcount

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 1cfe0d199e7c..7b8eacd5ac26 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1957,22 +1957,22 @@ static void ip_handle_martian_source(struct net_device *dev,
 #endif
 }
 
+/* called in rcu_read_lock() section */
 static int __mkroute_input(struct sk_buff *skb,
 			   struct fib_result *res,
 			   struct in_device *in_dev,
 			   __be32 daddr, __be32 saddr, u32 tos,
 			   struct rtable **result)
 {
-
 	struct rtable *rth;
 	int err;
 	struct in_device *out_dev;
-	unsigned flags = 0;
+	unsigned int flags = 0;
 	__be32 spec_dst;
 	u32 itag;
 
 	/* get a working reference to the output device */
-	out_dev = in_dev_get(FIB_RES_DEV(*res));
+	out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
 	if (out_dev == NULL) {
 		if (net_ratelimit())
 			printk(KERN_CRIT "Bug in ip_route_input" \
@@ -2053,8 +2053,6 @@ static int __mkroute_input(struct sk_buff *skb,
 	*result = rth;
 	err = 0;
  cleanup:
-	/* release the working reference to the output device */
-	in_dev_put(out_dev);
 	return err;
 }
 
-- 
cgit v1.2.3-58-ga151


From faa9dcf793beba05f7178b63a59eaa3ca5175b6a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 04:09:10 +0000
Subject: arp: RCU changes

Avoid two atomic ops in arp_fwd_proxy()

Avoid two atomic ops in arp_process()

Valid optims since arp_rcv() is run under rcu_read_lock()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index f094b75810db..917d2d66162e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -545,10 +545,10 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 
 	/* place to check for proxy_arp for routes */
 
-	if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) {
+	out_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	if (out_dev)
 		omi = IN_DEV_MEDIUM_ID(out_dev);
-		in_dev_put(out_dev);
-	}
+
 	return (omi != imi && omi != -1);
 }
 
@@ -741,7 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
 static int arp_process(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct arphdr *arp;
 	unsigned char *arp_ptr;
 	struct rtable *rt;
@@ -890,7 +890,6 @@ static int arp_process(struct sk_buff *skb)
 					arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
 				} else {
 					pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
-					in_dev_put(in_dev);
 					return 0;
 				}
 				goto out;
@@ -936,8 +935,6 @@ static int arp_process(struct sk_buff *skb)
 	}
 
 out:
-	if (in_dev)
-		in_dev_put(in_dev);
 	consume_skb(skb);
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From c6d409cfd0fd41e7a0847875e4338ad648c9b96b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 20:03:40 -0700
Subject: From abbffa2aa9bd6f8df16d0d0a102af677510d8b9a Mon Sep 17 00:00:00
 2001 From: Eric Dumazet <eric.dumazet@gmail.com> Date: Thu, 3 Jun 2010
 04:29:41 +0000 Subject: [PATCH 2/3] net: net/socket.c and net/compat.c
 cleanups

cleanup patch, to match modern coding style.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c |   47 ++++++++---------
 net/socket.c |  165 ++++++++++++++++++++++++++++------------------------------
 2 files changed, 102 insertions(+), 110 deletions(-)

diff --git a/net/compat.c b/net/compat.c
index 1cf7590..63d260e 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 	int tot_len;

 	if (kern_msg->msg_namelen) {
-		if (mode==VERIFY_READ) {
+		if (mode == VERIFY_READ) {
 			int err = move_addr_to_kernel(kern_msg->msg_name,
 						      kern_msg->msg_namelen,
 						      kern_address);
@@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname,
 static int do_set_sock_timeout(struct socket *sock, int level,
 		int optname, char __user *optval, unsigned int optlen)
 {
-	struct compat_timeval __user *up = (struct compat_timeval __user *) optval;
+	struct compat_timeval __user *up = (struct compat_timeval __user *)optval;
 	struct timeval ktime;
 	mm_segment_t old_fs;
 	int err;
@@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level,
 		return -EFAULT;
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
-	err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime));
+	err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime));
 	set_fs(old_fs);

 	return err;
@@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				char __user *optval, unsigned int optlen)
 {
 	int err;
-	struct socket *sock;
+	struct socket *sock = sockfd_lookup(fd, &err);

-	if ((sock = sockfd_lookup(fd, &err))!=NULL)
-	{
-		err = security_socket_setsockopt(sock,level,optname);
+	if (sock) {
+		err = security_socket_setsockopt(sock, level, optname);
 		if (err) {
 			sockfd_put(sock);
 			return err;
@@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
 int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct compat_timeval __user *ctv =
-			(struct compat_timeval __user*) userstamp;
+			(struct compat_timeval __user *) userstamp;
 	int err = -ENOENT;
 	struct timeval tv;

@@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp);
 int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
 {
 	struct compat_timespec __user *ctv =
-			(struct compat_timespec __user*) userstamp;
+			(struct compat_timespec __user *) userstamp;
 	int err = -ENOENT;
 	struct timespec ts;

@@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
 	int err;
-	struct socket *sock;
+	struct socket *sock = sockfd_lookup(fd, &err);

-	if ((sock = sockfd_lookup(fd, &err))!=NULL)
-	{
-		err = security_socket_getsockopt(sock, level,
-							   optname);
+	if (sock) {
+		err = security_socket_getsockopt(sock, level, optname);
 		if (err) {
 			sockfd_put(sock);
 			return err;
@@ -557,7 +554,7 @@ struct compat_group_filter {

 int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, unsigned int optlen,
-	int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int))
+	int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
 {
 	char __user	*koptval = optval;
 	int		koptlen = optlen;
@@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	}
 	return setsockopt(sock, level, optname, koptval, koptlen);
 }
-
 EXPORT_SYMBOL(compat_mc_setsockopt);

 int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, int __user *optlen,
-	int (*getsockopt)(struct sock *,int,int,char __user *,int __user *))
+	int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
 {
 	struct compat_group_filter __user *gf32 = (void *)optval;
 	struct group_filter __user *kgf;
@@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 	    __put_user(interface, &kgf->gf_interface) ||
 	    __put_user(fmode, &kgf->gf_fmode) ||
 	    __put_user(numsrc, &kgf->gf_numsrc) ||
-	    copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group)))
+	    copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)))
 		return -EFAULT;

 	err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
@@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 		copylen = numsrc * sizeof(gf32->gf_slist[0]);
 		if (copylen > klen)
 			copylen = klen;
-	        if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
+		if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
 			return -EFAULT;
 	}
 	return err;
 }
-
 EXPORT_SYMBOL(compat_mc_getsockopt);

 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-				AL(4),AL(5)};
+static unsigned char nas[20] = {
+	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
+	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
+	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
+	AL(4), AL(5)
+};
 #undef AL

 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 					  compat_ptr(a[4]), compat_ptr(a[5]));
 		break;
 	case SYS_SHUTDOWN:
-		ret = sys_shutdown(a0,a1);
+		ret = sys_shutdown(a0, a1);
 		break;
 	case SYS_SETSOCKOPT:
 		ret = compat_sys_setsockopt(a0, a1, a[2],
diff --git a/net/socket.c b/net/socket.c
index 367d547..b63c051 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -124,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
-			        struct pipe_inode_info *pipe, size_t len,
+				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags);

 /*
@@ -162,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly;
  *	Statistics counters of the socket lists
  */

-static DEFINE_PER_CPU(int, sockets_in_use) = 0;
+static DEFINE_PER_CPU(int, sockets_in_use);

 /*
  * Support routines.
@@ -309,9 +309,9 @@ static int init_inodecache(void)
 }

 static const struct super_operations sockfs_ops = {
-	.alloc_inode =	sock_alloc_inode,
-	.destroy_inode =sock_destroy_inode,
-	.statfs =	simple_statfs,
+	.alloc_inode	= sock_alloc_inode,
+	.destroy_inode	= sock_destroy_inode,
+	.statfs		= simple_statfs,
 };

 static int sockfs_get_sb(struct file_system_type *fs_type,
@@ -411,6 +411,7 @@ int sock_map_fd(struct socket *sock, int flags)

 	return fd;
 }
+EXPORT_SYMBOL(sock_map_fd);

 static struct socket *sock_from_file(struct file *file, int *err)
 {
@@ -422,7 +423,7 @@ static struct socket *sock_from_file(struct file *file, int *err)
 }

 /**
- *	sockfd_lookup	- 	Go from a file number to its socket slot
+ *	sockfd_lookup - Go from a file number to its socket slot
  *	@fd: file handle
  *	@err: pointer to an error code return
  *
@@ -450,6 +451,7 @@ struct socket *sockfd_lookup(int fd, int *err)
 		fput(file);
 	return sock;
 }
+EXPORT_SYMBOL(sockfd_lookup);

 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 {
@@ -540,6 +542,7 @@ void sock_release(struct socket *sock)
 	}
 	sock->file = NULL;
 }
+EXPORT_SYMBOL(sock_release);

 int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
 		      union skb_shared_tx *shtx)
@@ -586,6 +589,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		ret = wait_on_sync_kiocb(&iocb);
 	return ret;
 }
+EXPORT_SYMBOL(sock_sendmsg);

 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
@@ -604,6 +608,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	set_fs(oldfs);
 	return result;
 }
+EXPORT_SYMBOL(kernel_sendmsg);

 static int ktime2ts(ktime_t kt, struct timespec *ts)
 {
@@ -664,7 +669,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(ts), &ts);
 }
-
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);

 inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
@@ -720,6 +724,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		ret = wait_on_sync_kiocb(&iocb);
 	return ret;
 }
+EXPORT_SYMBOL(sock_recvmsg);

 static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 			      size_t size, int flags)
@@ -752,6 +757,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	set_fs(oldfs);
 	return result;
 }
+EXPORT_SYMBOL(kernel_recvmsg);

 static void sock_aio_dtor(struct kiocb *iocb)
 {
@@ -774,7 +780,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
 }

 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
-			        struct pipe_inode_info *pipe, size_t len,
+				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags)
 {
 	struct socket *sock = file->private_data;
@@ -887,7 +893,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
  */

 static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
+static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);

 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 {
@@ -895,7 +901,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 	br_ioctl_hook = hook;
 	mutex_unlock(&br_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(brioctl_set);

 static DEFINE_MUTEX(vlan_ioctl_mutex);
@@ -907,7 +912,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
 	vlan_ioctl_hook = hook;
 	mutex_unlock(&vlan_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(vlan_ioctl_set);

 static DEFINE_MUTEX(dlci_ioctl_mutex);
@@ -919,7 +923,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 	dlci_ioctl_hook = hook;
 	mutex_unlock(&dlci_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(dlci_ioctl_set);

 static long sock_do_ioctl(struct net *net, struct socket *sock,
@@ -1047,6 +1050,7 @@ out_release:
 	sock = NULL;
 	goto out;
 }
+EXPORT_SYMBOL(sock_create_lite);

 /* No kernel lock held - perfect */
 static unsigned int sock_poll(struct file *file, poll_table *wait)
@@ -1147,6 +1151,7 @@ call_kill:
 	rcu_read_unlock();
 	return 0;
 }
+EXPORT_SYMBOL(sock_wake_async);

 static int __sock_create(struct net *net, int family, int type, int protocol,
 			 struct socket **res, int kern)
@@ -1265,11 +1270,13 @@ int sock_create(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
 }
+EXPORT_SYMBOL(sock_create);

 int sock_create_kern(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(&init_net, family, type, protocol, res, 1);
 }
+EXPORT_SYMBOL(sock_create_kern);

 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 {
@@ -1474,7 +1481,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 		goto out;

 	err = -ENFILE;
-	if (!(newsock = sock_alloc()))
+	newsock = sock_alloc();
+	if (!newsock)
 		goto out_put;

 	newsock->type = sock->type;
@@ -1861,8 +1869,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	}
-	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;

 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -1964,8 +1971,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	}
-	else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;

 	err = -EMSGSIZE;
@@ -2191,10 +2197,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
 static const unsigned char nargs[20] = {
-	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-	AL(4),AL(5)
+	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
+	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
+	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
+	AL(4), AL(5)
 };

 #undef AL
@@ -2340,6 +2346,7 @@ int sock_register(const struct net_proto_family *ops)
 	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
+EXPORT_SYMBOL(sock_register);

 /**
  *	sock_unregister - remove a protocol handler
@@ -2366,6 +2373,7 @@ void sock_unregister(int family)

 	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
 }
+EXPORT_SYMBOL(sock_unregister);

 static int __init sock_init(void)
 {
@@ -2490,13 +2498,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 		ifc.ifc_req = NULL;
 		uifc = compat_alloc_user_space(sizeof(struct ifconf));
 	} else {
-		size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) *
-			sizeof (struct ifreq);
+		size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
+			sizeof(struct ifreq);
 		uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
 		ifc.ifc_len = len;
 		ifr = ifc.ifc_req = (void __user *)(uifc + 1);
 		ifr32 = compat_ptr(ifc32.ifcbuf);
-		for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) {
+		for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
 			if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
 				return -EFAULT;
 			ifr++;
@@ -2516,9 +2524,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 	ifr = ifc.ifc_req;
 	ifr32 = compat_ptr(ifc32.ifcbuf);
 	for (i = 0, j = 0;
-             i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
-	     i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) {
-		if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq)))
+	     i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
+	     i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
+		if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
 			return -EFAULT;
 		ifr32++;
 		ifr++;
@@ -2567,7 +2575,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
 	compat_uptr_t uptr32;
 	struct ifreq __user *uifr;

-	uifr = compat_alloc_user_space(sizeof (*uifr));
+	uifr = compat_alloc_user_space(sizeof(*uifr));
 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
 		return -EFAULT;

@@ -2601,9 +2609,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
 			return -EFAULT;

 		old_fs = get_fs();
-		set_fs (KERNEL_DS);
+		set_fs(KERNEL_DS);
 		err = dev_ioctl(net, cmd, &kifr);
-		set_fs (old_fs);
+		set_fs(old_fs);

 		return err;
 	case SIOCBONDSLAVEINFOQUERY:
@@ -2710,9 +2718,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 		return -EFAULT;

 	old_fs = get_fs();
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	err = dev_ioctl(net, cmd, (void __user *)&ifr);
-	set_fs (old_fs);
+	set_fs(old_fs);

 	if (cmd == SIOCGIFMAP && !err) {
 		err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -2734,7 +2742,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
 	compat_uptr_t uptr32;
 	struct ifreq __user *uifr;

-	uifr = compat_alloc_user_space(sizeof (*uifr));
+	uifr = compat_alloc_user_space(sizeof(*uifr));
 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
 		return -EFAULT;

@@ -2750,20 +2758,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
 }

 struct rtentry32 {
-	u32   		rt_pad1;
+	u32		rt_pad1;
 	struct sockaddr rt_dst;         /* target address               */
 	struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
 	struct sockaddr rt_genmask;     /* target network mask (IP)     */
-	unsigned short  rt_flags;
-	short           rt_pad2;
-	u32   		rt_pad3;
-	unsigned char   rt_tos;
-	unsigned char   rt_class;
-	short           rt_pad4;
-	short           rt_metric;      /* +1 for binary compatibility! */
+	unsigned short	rt_flags;
+	short		rt_pad2;
+	u32		rt_pad3;
+	unsigned char	rt_tos;
+	unsigned char	rt_class;
+	short		rt_pad4;
+	short		rt_metric;      /* +1 for binary compatibility! */
 	/* char * */ u32 rt_dev;        /* forcing the device at add    */
-	u32   		rt_mtu;         /* per route MTU/Window         */
-	u32   		rt_window;      /* Window clamping              */
+	u32		rt_mtu;         /* per route MTU/Window         */
+	u32		rt_window;      /* Window clamping              */
 	unsigned short  rt_irtt;        /* Initial RTT                  */
 };

@@ -2793,29 +2801,29 @@ static int routing_ioctl(struct net *net, struct socket *sock,

 	if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
 		struct in6_rtmsg32 __user *ur6 = argp;
-		ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
+		ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
 			3 * sizeof(struct in6_addr));
-		ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
-		ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
-		ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
-		ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
-		ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
-		ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
-		ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
+		ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
+		ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
+		ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
+		ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
+		ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
+		ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
+		ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));

 		r = (void *) &r6;
 	} else { /* ipv4 */
 		struct rtentry32 __user *ur4 = argp;
-		ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
+		ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
 					3 * sizeof(struct sockaddr));
-		ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
-		ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
-		ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
-		ret |= __get_user (r4.rt_window, &(ur4->rt_window));
-		ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
-		ret |= __get_user (rtdev, &(ur4->rt_dev));
+		ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
+		ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
+		ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
+		ret |= __get_user(r4.rt_window, &(ur4->rt_window));
+		ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
+		ret |= __get_user(rtdev, &(ur4->rt_dev));
 		if (rtdev) {
-			ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
+			ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
 			r4.rt_dev = devname; devname[15] = 0;
 		} else
 			r4.rt_dev = NULL;
@@ -2828,9 +2836,9 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 		goto out;
 	}

-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
-	set_fs (old_fs);
+	set_fs(old_fs);

 out:
 	return ret;
@@ -2993,11 +3001,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
 {
 	return sock->ops->bind(sock, addr, addrlen);
 }
+EXPORT_SYMBOL(kernel_bind);

 int kernel_listen(struct socket *sock, int backlog)
 {
 	return sock->ops->listen(sock, backlog);
 }
+EXPORT_SYMBOL(kernel_listen);

 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 {
@@ -3022,24 +3032,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 done:
 	return err;
 }
+EXPORT_SYMBOL(kernel_accept);

 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags)
 {
 	return sock->ops->connect(sock, addr, addrlen, flags);
 }
+EXPORT_SYMBOL(kernel_connect);

 int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
 			 int *addrlen)
 {
 	return sock->ops->getname(sock, addr, addrlen, 0);
 }
+EXPORT_SYMBOL(kernel_getsockname);

 int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
 			 int *addrlen)
 {
 	return sock->ops->getname(sock, addr, addrlen, 1);
 }
+EXPORT_SYMBOL(kernel_getpeername);

 int kernel_getsockopt(struct socket *sock, int level, int optname,
 			char *optval, int *optlen)
@@ -3056,6 +3070,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
 	set_fs(oldfs);
 	return err;
 }
+EXPORT_SYMBOL(kernel_getsockopt);

 int kernel_setsockopt(struct socket *sock, int level, int optname,
 			char *optval, unsigned int optlen)
@@ -3072,6 +3087,7 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
 	set_fs(oldfs);
 	return err;
 }
+EXPORT_SYMBOL(kernel_setsockopt);

 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
@@ -3083,6 +3099,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,

 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
+EXPORT_SYMBOL(kernel_sendpage);

 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 {
@@ -3095,33 +3112,11 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)

 	return err;
 }
+EXPORT_SYMBOL(kernel_sock_ioctl);

 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
 {
 	return sock->ops->shutdown(sock, how);
 }
-
-EXPORT_SYMBOL(sock_create);
-EXPORT_SYMBOL(sock_create_kern);
-EXPORT_SYMBOL(sock_create_lite);
-EXPORT_SYMBOL(sock_map_fd);
-EXPORT_SYMBOL(sock_recvmsg);
-EXPORT_SYMBOL(sock_register);
-EXPORT_SYMBOL(sock_release);
-EXPORT_SYMBOL(sock_sendmsg);
-EXPORT_SYMBOL(sock_unregister);
-EXPORT_SYMBOL(sock_wake_async);
-EXPORT_SYMBOL(sockfd_lookup);
-EXPORT_SYMBOL(kernel_sendmsg);
-EXPORT_SYMBOL(kernel_recvmsg);
-EXPORT_SYMBOL(kernel_bind);
-EXPORT_SYMBOL(kernel_listen);
-EXPORT_SYMBOL(kernel_accept);
-EXPORT_SYMBOL(kernel_connect);
-EXPORT_SYMBOL(kernel_getsockname);
-EXPORT_SYMBOL(kernel_getpeername);
-EXPORT_SYMBOL(kernel_getsockopt);
-EXPORT_SYMBOL(kernel_setsockopt);
-EXPORT_SYMBOL(kernel_sendpage);
-EXPORT_SYMBOL(kernel_sock_ioctl);
 EXPORT_SYMBOL(kernel_sock_shutdown);
+
--
1.7.0.4
---
 net/compat.c |  47 ++++++++---------
 net/socket.c | 164 ++++++++++++++++++++++++++++-------------------------------
 2 files changed, 101 insertions(+), 110 deletions(-)

(limited to 'net')

diff --git a/net/compat.c b/net/compat.c
index 1cf75905f132..63d260e81472 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -81,7 +81,7 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
 	int tot_len;
 
 	if (kern_msg->msg_namelen) {
-		if (mode==VERIFY_READ) {
+		if (mode == VERIFY_READ) {
 			int err = move_addr_to_kernel(kern_msg->msg_name,
 						      kern_msg->msg_namelen,
 						      kern_address);
@@ -354,7 +354,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname,
 static int do_set_sock_timeout(struct socket *sock, int level,
 		int optname, char __user *optval, unsigned int optlen)
 {
-	struct compat_timeval __user *up = (struct compat_timeval __user *) optval;
+	struct compat_timeval __user *up = (struct compat_timeval __user *)optval;
 	struct timeval ktime;
 	mm_segment_t old_fs;
 	int err;
@@ -367,7 +367,7 @@ static int do_set_sock_timeout(struct socket *sock, int level,
 		return -EFAULT;
 	old_fs = get_fs();
 	set_fs(KERNEL_DS);
-	err = sock_setsockopt(sock, level, optname, (char *) &ktime, sizeof(ktime));
+	err = sock_setsockopt(sock, level, optname, (char *)&ktime, sizeof(ktime));
 	set_fs(old_fs);
 
 	return err;
@@ -389,11 +389,10 @@ asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
 				char __user *optval, unsigned int optlen)
 {
 	int err;
-	struct socket *sock;
+	struct socket *sock = sockfd_lookup(fd, &err);
 
-	if ((sock = sockfd_lookup(fd, &err))!=NULL)
-	{
-		err = security_socket_setsockopt(sock,level,optname);
+	if (sock) {
+		err = security_socket_setsockopt(sock, level, optname);
 		if (err) {
 			sockfd_put(sock);
 			return err;
@@ -453,7 +452,7 @@ static int compat_sock_getsockopt(struct socket *sock, int level, int optname,
 int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
 	struct compat_timeval __user *ctv =
-			(struct compat_timeval __user*) userstamp;
+			(struct compat_timeval __user *) userstamp;
 	int err = -ENOENT;
 	struct timeval tv;
 
@@ -477,7 +476,7 @@ EXPORT_SYMBOL(compat_sock_get_timestamp);
 int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
 {
 	struct compat_timespec __user *ctv =
-			(struct compat_timespec __user*) userstamp;
+			(struct compat_timespec __user *) userstamp;
 	int err = -ENOENT;
 	struct timespec ts;
 
@@ -502,12 +501,10 @@ asmlinkage long compat_sys_getsockopt(int fd, int level, int optname,
 				char __user *optval, int __user *optlen)
 {
 	int err;
-	struct socket *sock;
+	struct socket *sock = sockfd_lookup(fd, &err);
 
-	if ((sock = sockfd_lookup(fd, &err))!=NULL)
-	{
-		err = security_socket_getsockopt(sock, level,
-							   optname);
+	if (sock) {
+		err = security_socket_getsockopt(sock, level, optname);
 		if (err) {
 			sockfd_put(sock);
 			return err;
@@ -557,7 +554,7 @@ struct compat_group_filter {
 
 int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, unsigned int optlen,
-	int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int))
+	int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
 {
 	char __user	*koptval = optval;
 	int		koptlen = optlen;
@@ -640,12 +637,11 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	}
 	return setsockopt(sock, level, optname, koptval, koptlen);
 }
-
 EXPORT_SYMBOL(compat_mc_setsockopt);
 
 int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, int __user *optlen,
-	int (*getsockopt)(struct sock *,int,int,char __user *,int __user *))
+	int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
 {
 	struct compat_group_filter __user *gf32 = (void *)optval;
 	struct group_filter __user *kgf;
@@ -681,7 +677,7 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 	    __put_user(interface, &kgf->gf_interface) ||
 	    __put_user(fmode, &kgf->gf_fmode) ||
 	    __put_user(numsrc, &kgf->gf_numsrc) ||
-	    copy_in_user(&kgf->gf_group,&gf32->gf_group,sizeof(kgf->gf_group)))
+	    copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)))
 		return -EFAULT;
 
 	err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
@@ -714,21 +710,22 @@ int compat_mc_getsockopt(struct sock *sock, int level, int optname,
 		copylen = numsrc * sizeof(gf32->gf_slist[0]);
 		if (copylen > klen)
 			copylen = klen;
-	        if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
+		if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
 			return -EFAULT;
 	}
 	return err;
 }
-
 EXPORT_SYMBOL(compat_mc_getsockopt);
 
 
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-				AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-				AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-				AL(4),AL(5)};
+static unsigned char nas[20] = {
+	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
+	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
+	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
+	AL(4), AL(5)
+};
 #undef AL
 
 asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -827,7 +824,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 					  compat_ptr(a[4]), compat_ptr(a[5]));
 		break;
 	case SYS_SHUTDOWN:
-		ret = sys_shutdown(a0,a1);
+		ret = sys_shutdown(a0, a1);
 		break;
 	case SYS_SETSOCKOPT:
 		ret = compat_sys_setsockopt(a0, a1, a[2],
diff --git a/net/socket.c b/net/socket.c
index 367d5477d00f..acfa1738663d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -124,7 +124,7 @@ static int sock_fasync(int fd, struct file *filp, int on);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
 			     int offset, size_t size, loff_t *ppos, int more);
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
-			        struct pipe_inode_info *pipe, size_t len,
+				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags);
 
 /*
@@ -162,7 +162,7 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly;
  *	Statistics counters of the socket lists
  */
 
-static DEFINE_PER_CPU(int, sockets_in_use) = 0;
+static DEFINE_PER_CPU(int, sockets_in_use);
 
 /*
  * Support routines.
@@ -309,9 +309,9 @@ static int init_inodecache(void)
 }
 
 static const struct super_operations sockfs_ops = {
-	.alloc_inode =	sock_alloc_inode,
-	.destroy_inode =sock_destroy_inode,
-	.statfs =	simple_statfs,
+	.alloc_inode	= sock_alloc_inode,
+	.destroy_inode	= sock_destroy_inode,
+	.statfs		= simple_statfs,
 };
 
 static int sockfs_get_sb(struct file_system_type *fs_type,
@@ -411,6 +411,7 @@ int sock_map_fd(struct socket *sock, int flags)
 
 	return fd;
 }
+EXPORT_SYMBOL(sock_map_fd);
 
 static struct socket *sock_from_file(struct file *file, int *err)
 {
@@ -422,7 +423,7 @@ static struct socket *sock_from_file(struct file *file, int *err)
 }
 
 /**
- *	sockfd_lookup	- 	Go from a file number to its socket slot
+ *	sockfd_lookup - Go from a file number to its socket slot
  *	@fd: file handle
  *	@err: pointer to an error code return
  *
@@ -450,6 +451,7 @@ struct socket *sockfd_lookup(int fd, int *err)
 		fput(file);
 	return sock;
 }
+EXPORT_SYMBOL(sockfd_lookup);
 
 static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
 {
@@ -540,6 +542,7 @@ void sock_release(struct socket *sock)
 	}
 	sock->file = NULL;
 }
+EXPORT_SYMBOL(sock_release);
 
 int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
 		      union skb_shared_tx *shtx)
@@ -586,6 +589,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		ret = wait_on_sync_kiocb(&iocb);
 	return ret;
 }
+EXPORT_SYMBOL(sock_sendmsg);
 
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
@@ -604,6 +608,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	set_fs(oldfs);
 	return result;
 }
+EXPORT_SYMBOL(kernel_sendmsg);
 
 static int ktime2ts(ktime_t kt, struct timespec *ts)
 {
@@ -664,7 +669,6 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		put_cmsg(msg, SOL_SOCKET,
 			 SCM_TIMESTAMPING, sizeof(ts), &ts);
 }
-
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
 inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
@@ -720,6 +724,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		ret = wait_on_sync_kiocb(&iocb);
 	return ret;
 }
+EXPORT_SYMBOL(sock_recvmsg);
 
 static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 			      size_t size, int flags)
@@ -752,6 +757,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	set_fs(oldfs);
 	return result;
 }
+EXPORT_SYMBOL(kernel_recvmsg);
 
 static void sock_aio_dtor(struct kiocb *iocb)
 {
@@ -774,7 +780,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
 }
 
 static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
-			        struct pipe_inode_info *pipe, size_t len,
+				struct pipe_inode_info *pipe, size_t len,
 				unsigned int flags)
 {
 	struct socket *sock = file->private_data;
@@ -887,7 +893,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
  */
 
 static DEFINE_MUTEX(br_ioctl_mutex);
-static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
+static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
 
 void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 {
@@ -895,7 +901,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
 	br_ioctl_hook = hook;
 	mutex_unlock(&br_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(brioctl_set);
 
 static DEFINE_MUTEX(vlan_ioctl_mutex);
@@ -907,7 +912,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
 	vlan_ioctl_hook = hook;
 	mutex_unlock(&vlan_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(vlan_ioctl_set);
 
 static DEFINE_MUTEX(dlci_ioctl_mutex);
@@ -919,7 +923,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
 	dlci_ioctl_hook = hook;
 	mutex_unlock(&dlci_ioctl_mutex);
 }
-
 EXPORT_SYMBOL(dlci_ioctl_set);
 
 static long sock_do_ioctl(struct net *net, struct socket *sock,
@@ -1047,6 +1050,7 @@ out_release:
 	sock = NULL;
 	goto out;
 }
+EXPORT_SYMBOL(sock_create_lite);
 
 /* No kernel lock held - perfect */
 static unsigned int sock_poll(struct file *file, poll_table *wait)
@@ -1147,6 +1151,7 @@ call_kill:
 	rcu_read_unlock();
 	return 0;
 }
+EXPORT_SYMBOL(sock_wake_async);
 
 static int __sock_create(struct net *net, int family, int type, int protocol,
 			 struct socket **res, int kern)
@@ -1265,11 +1270,13 @@ int sock_create(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
 }
+EXPORT_SYMBOL(sock_create);
 
 int sock_create_kern(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(&init_net, family, type, protocol, res, 1);
 }
+EXPORT_SYMBOL(sock_create_kern);
 
 SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
 {
@@ -1474,7 +1481,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 		goto out;
 
 	err = -ENFILE;
-	if (!(newsock = sock_alloc()))
+	newsock = sock_alloc();
+	if (!newsock)
 		goto out_put;
 
 	newsock->type = sock->type;
@@ -1861,8 +1869,7 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(&msg_sys, msg_compat))
 			return -EFAULT;
-	}
-	else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -1964,8 +1971,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
 	if (MSG_CMSG_COMPAT & flags) {
 		if (get_compat_msghdr(msg_sys, msg_compat))
 			return -EFAULT;
-	}
-	else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
+	} else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
 		return -EFAULT;
 
 	err = -EMSGSIZE;
@@ -2191,10 +2197,10 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 /* Argument list sizes for sys_socketcall */
 #define AL(x) ((x) * sizeof(unsigned long))
 static const unsigned char nargs[20] = {
-	AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
-	AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
-	AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
-	AL(4),AL(5)
+	AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
+	AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
+	AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
+	AL(4), AL(5)
 };
 
 #undef AL
@@ -2340,6 +2346,7 @@ int sock_register(const struct net_proto_family *ops)
 	printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
 	return err;
 }
+EXPORT_SYMBOL(sock_register);
 
 /**
  *	sock_unregister - remove a protocol handler
@@ -2366,6 +2373,7 @@ void sock_unregister(int family)
 
 	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
 }
+EXPORT_SYMBOL(sock_unregister);
 
 static int __init sock_init(void)
 {
@@ -2490,13 +2498,13 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 		ifc.ifc_req = NULL;
 		uifc = compat_alloc_user_space(sizeof(struct ifconf));
 	} else {
-		size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) *
-			sizeof (struct ifreq);
+		size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
+			sizeof(struct ifreq);
 		uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
 		ifc.ifc_len = len;
 		ifr = ifc.ifc_req = (void __user *)(uifc + 1);
 		ifr32 = compat_ptr(ifc32.ifcbuf);
-		for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) {
+		for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
 			if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
 				return -EFAULT;
 			ifr++;
@@ -2516,9 +2524,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
 	ifr = ifc.ifc_req;
 	ifr32 = compat_ptr(ifc32.ifcbuf);
 	for (i = 0, j = 0;
-             i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
-	     i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) {
-		if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq)))
+	     i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
+	     i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
+		if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
 			return -EFAULT;
 		ifr32++;
 		ifr++;
@@ -2567,7 +2575,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32
 	compat_uptr_t uptr32;
 	struct ifreq __user *uifr;
 
-	uifr = compat_alloc_user_space(sizeof (*uifr));
+	uifr = compat_alloc_user_space(sizeof(*uifr));
 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
 		return -EFAULT;
 
@@ -2601,9 +2609,9 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
 			return -EFAULT;
 
 		old_fs = get_fs();
-		set_fs (KERNEL_DS);
+		set_fs(KERNEL_DS);
 		err = dev_ioctl(net, cmd, &kifr);
-		set_fs (old_fs);
+		set_fs(old_fs);
 
 		return err;
 	case SIOCBONDSLAVEINFOQUERY:
@@ -2710,9 +2718,9 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 		return -EFAULT;
 
 	old_fs = get_fs();
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	err = dev_ioctl(net, cmd, (void __user *)&ifr);
-	set_fs (old_fs);
+	set_fs(old_fs);
 
 	if (cmd == SIOCGIFMAP && !err) {
 		err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -2734,7 +2742,7 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
 	compat_uptr_t uptr32;
 	struct ifreq __user *uifr;
 
-	uifr = compat_alloc_user_space(sizeof (*uifr));
+	uifr = compat_alloc_user_space(sizeof(*uifr));
 	if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
 		return -EFAULT;
 
@@ -2750,20 +2758,20 @@ static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uif
 }
 
 struct rtentry32 {
-	u32   		rt_pad1;
+	u32		rt_pad1;
 	struct sockaddr rt_dst;         /* target address               */
 	struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
 	struct sockaddr rt_genmask;     /* target network mask (IP)     */
-	unsigned short  rt_flags;
-	short           rt_pad2;
-	u32   		rt_pad3;
-	unsigned char   rt_tos;
-	unsigned char   rt_class;
-	short           rt_pad4;
-	short           rt_metric;      /* +1 for binary compatibility! */
+	unsigned short	rt_flags;
+	short		rt_pad2;
+	u32		rt_pad3;
+	unsigned char	rt_tos;
+	unsigned char	rt_class;
+	short		rt_pad4;
+	short		rt_metric;      /* +1 for binary compatibility! */
 	/* char * */ u32 rt_dev;        /* forcing the device at add    */
-	u32   		rt_mtu;         /* per route MTU/Window         */
-	u32   		rt_window;      /* Window clamping              */
+	u32		rt_mtu;         /* per route MTU/Window         */
+	u32		rt_window;      /* Window clamping              */
 	unsigned short  rt_irtt;        /* Initial RTT                  */
 };
 
@@ -2793,29 +2801,29 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 
 	if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
 		struct in6_rtmsg32 __user *ur6 = argp;
-		ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst),
+		ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
 			3 * sizeof(struct in6_addr));
-		ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type));
-		ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
-		ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
-		ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric));
-		ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info));
-		ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags));
-		ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
+		ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
+		ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
+		ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
+		ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
+		ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
+		ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
+		ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
 
 		r = (void *) &r6;
 	} else { /* ipv4 */
 		struct rtentry32 __user *ur4 = argp;
-		ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst),
+		ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
 					3 * sizeof(struct sockaddr));
-		ret |= __get_user (r4.rt_flags, &(ur4->rt_flags));
-		ret |= __get_user (r4.rt_metric, &(ur4->rt_metric));
-		ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu));
-		ret |= __get_user (r4.rt_window, &(ur4->rt_window));
-		ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt));
-		ret |= __get_user (rtdev, &(ur4->rt_dev));
+		ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
+		ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
+		ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
+		ret |= __get_user(r4.rt_window, &(ur4->rt_window));
+		ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
+		ret |= __get_user(rtdev, &(ur4->rt_dev));
 		if (rtdev) {
-			ret |= copy_from_user (devname, compat_ptr(rtdev), 15);
+			ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
 			r4.rt_dev = devname; devname[15] = 0;
 		} else
 			r4.rt_dev = NULL;
@@ -2828,9 +2836,9 @@ static int routing_ioctl(struct net *net, struct socket *sock,
 		goto out;
 	}
 
-	set_fs (KERNEL_DS);
+	set_fs(KERNEL_DS);
 	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
-	set_fs (old_fs);
+	set_fs(old_fs);
 
 out:
 	return ret;
@@ -2993,11 +3001,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
 {
 	return sock->ops->bind(sock, addr, addrlen);
 }
+EXPORT_SYMBOL(kernel_bind);
 
 int kernel_listen(struct socket *sock, int backlog)
 {
 	return sock->ops->listen(sock, backlog);
 }
+EXPORT_SYMBOL(kernel_listen);
 
 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 {
@@ -3022,24 +3032,28 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 done:
 	return err;
 }
+EXPORT_SYMBOL(kernel_accept);
 
 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags)
 {
 	return sock->ops->connect(sock, addr, addrlen, flags);
 }
+EXPORT_SYMBOL(kernel_connect);
 
 int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
 			 int *addrlen)
 {
 	return sock->ops->getname(sock, addr, addrlen, 0);
 }
+EXPORT_SYMBOL(kernel_getsockname);
 
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
 			 int *addrlen)
 {
 	return sock->ops->getname(sock, addr, addrlen, 1);
 }
+EXPORT_SYMBOL(kernel_getpeername);
 
 int kernel_getsockopt(struct socket *sock, int level, int optname,
 			char *optval, int *optlen)
@@ -3056,6 +3070,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
 	set_fs(oldfs);
 	return err;
 }
+EXPORT_SYMBOL(kernel_getsockopt);
 
 int kernel_setsockopt(struct socket *sock, int level, int optname,
 			char *optval, unsigned int optlen)
@@ -3072,6 +3087,7 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
 	set_fs(oldfs);
 	return err;
 }
+EXPORT_SYMBOL(kernel_setsockopt);
 
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
@@ -3083,6 +3099,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
+EXPORT_SYMBOL(kernel_sendpage);
 
 int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 {
@@ -3095,33 +3112,10 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
 
 	return err;
 }
+EXPORT_SYMBOL(kernel_sock_ioctl);
 
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
 {
 	return sock->ops->shutdown(sock, how);
 }
-
-EXPORT_SYMBOL(sock_create);
-EXPORT_SYMBOL(sock_create_kern);
-EXPORT_SYMBOL(sock_create_lite);
-EXPORT_SYMBOL(sock_map_fd);
-EXPORT_SYMBOL(sock_recvmsg);
-EXPORT_SYMBOL(sock_register);
-EXPORT_SYMBOL(sock_release);
-EXPORT_SYMBOL(sock_sendmsg);
-EXPORT_SYMBOL(sock_unregister);
-EXPORT_SYMBOL(sock_wake_async);
-EXPORT_SYMBOL(sockfd_lookup);
-EXPORT_SYMBOL(kernel_sendmsg);
-EXPORT_SYMBOL(kernel_recvmsg);
-EXPORT_SYMBOL(kernel_bind);
-EXPORT_SYMBOL(kernel_listen);
-EXPORT_SYMBOL(kernel_accept);
-EXPORT_SYMBOL(kernel_connect);
-EXPORT_SYMBOL(kernel_getsockname);
-EXPORT_SYMBOL(kernel_getpeername);
-EXPORT_SYMBOL(kernel_getsockopt);
-EXPORT_SYMBOL(kernel_setsockopt);
-EXPORT_SYMBOL(kernel_sendpage);
-EXPORT_SYMBOL(kernel_sock_ioctl);
 EXPORT_SYMBOL(kernel_sock_shutdown);
-- 
cgit v1.2.3-58-ga151


From e12f8e29a8526172b7715881503bae636d60bdd8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 4 Jun 2010 13:31:29 +0200
Subject: netfilter: vmalloc_node cleanup

Using vmalloc_node(size, numa_node_id()) for temporary storage is not
needed. vmalloc(size) is more respectful of user NUMA policy.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 7 +++----
 net/ipv4/netfilter/ip_tables.c  | 4 ++--
 net/ipv6/netfilter/ip6_tables.c | 7 +++----
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 1ac01b128621..16c0ba0a2728 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -758,7 +758,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	 * about).
 	 */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = vmalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1005,8 +1005,7 @@ static int __do_replace(struct net *net, const char *name,
 	struct arpt_entry *iter;
 
 	ret = 0;
-	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
-				numa_node_id());
+	counters = vmalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
@@ -1159,7 +1158,7 @@ static int do_add_counters(struct net *net, const void __user *user,
 	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len - size, numa_node_id());
+	paddc = vmalloc(len - size);
 	if (!paddc)
 		return -ENOMEM;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 63958f3394a5..7c0b8ad61f9d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -928,7 +928,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = vmalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1352,7 +1352,7 @@ do_add_counters(struct net *net, const void __user *user,
 	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len - size, numa_node_id());
+	paddc = vmalloc(len - size);
 	if (!paddc)
 		return -ENOMEM;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 6f517bd83692..82945ef6c9fc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -943,7 +943,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 	   (other than comefrom, which userspace doesn't care
 	   about). */
 	countersize = sizeof(struct xt_counters) * private->number;
-	counters = vmalloc_node(countersize, numa_node_id());
+	counters = vmalloc(countersize);
 
 	if (counters == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -1213,8 +1213,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
 	struct ip6t_entry *iter;
 
 	ret = 0;
-	counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
-				numa_node_id());
+	counters = vmalloc(num_counters * sizeof(struct xt_counters));
 	if (!counters) {
 		ret = -ENOMEM;
 		goto out;
@@ -1368,7 +1367,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
 	if (len != size + num_counters * sizeof(struct xt_counters))
 		return -EINVAL;
 
-	paddc = vmalloc_node(len - size, numa_node_id());
+	paddc = vmalloc(len - size);
 	if (!paddc)
 		return -ENOMEM;
 
-- 
cgit v1.2.3-58-ga151


From 8764ab2ca7ab5055e1ca80f9cfa4970c34acb804 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Fri, 4 Jun 2010 01:57:38 +0000
Subject: net: check for refcount if pop a stacked dst_entry

xfrm triggers a warning if dst_pop() drops a refcount
on a noref dst. This patch changes dst_pop() to
skb_dst_pop(). skb_dst_pop() drops the refcnt only
on a refcounted dst. Also we don't clone the child
dst_entry, so it is not refcounted and we can use
skb_dst_set_noref() in xfrm_output_one().

Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h      | 6 +++---
 net/xfrm/xfrm_output.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 612069beda73..81d1413a8701 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -250,11 +250,11 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
  * Linux networking.  Thus, destinations are stackable.
  */
 
-static inline struct dst_entry *dst_pop(struct dst_entry *dst)
+static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb)
 {
-	struct dst_entry *child = dst_clone(dst->child);
+	struct dst_entry *child = skb_dst(skb)->child;
 
-	dst_release(dst);
+	skb_dst_drop(skb);
 	return child;
 }
 
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 6a329158bdfa..a3cca0a94346 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -95,13 +95,13 @@ resume:
 			goto error_nolock;
 		}
 
-		dst = dst_pop(dst);
+		dst = skb_dst_pop(skb);
 		if (!dst) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
 			err = -EHOSTUNREACH;
 			goto error_nolock;
 		}
-		skb_dst_set(skb, dst);
+		skb_dst_set_noref(skb, dst);
 		x = dst->xfrm;
 	} while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL));
 
-- 
cgit v1.2.3-58-ga151


From 57f1553ee5d9f093660cc49098f494e17ed11668 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2010 00:42:30 +0000
Subject: syncookies: remove Kconfig text line about disabled-by-default

syncookies default to on since
e994b7c901ded7200b525a707c6da71f2cf6d4bb
(tcp: Don't make syn cookies initial setting depend on CONFIG_SYSCTL).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 8e3a1fd938ab..7c3a7d191249 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -303,7 +303,7 @@ config ARPD
 	  If unsure, say N.
 
 config SYN_COOKIES
-	bool "IP: TCP syncookie support (disabled per default)"
+	bool "IP: TCP syncookie support"
 	---help---
 	  Normal TCP/IP networking is open to an attack known as "SYN
 	  flooding". This denial-of-service attack prevents legitimate remote
@@ -328,13 +328,13 @@ config SYN_COOKIES
 	  server is really overloaded. If this happens frequently better turn
 	  them off.
 
-	  If you say Y here, note that SYN cookies aren't enabled by default;
-	  you can enable them by saying Y to "/proc file system support" and
+	  If you say Y here, you can disable SYN cookies at run time by
+	  saying Y to "/proc file system support" and
 	  "Sysctl support" below and executing the command
 
-	  echo 1 >/proc/sys/net/ipv4/tcp_syncookies
+	  echo 0 > /proc/sys/net/ipv4/tcp_syncookies
 
-	  at boot time after the /proc file system has been mounted.
+	  after the /proc file system has been mounted.
 
 	  If unsure, say N.
 
-- 
cgit v1.2.3-58-ga151


From ca55158c6ecb7832a6ad80ac44a14d23bab8cdfc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 09:03:58 +0000
Subject: rps: tcp: fix rps_sock_flow_table table updates

I believe a moderate SYN flood attack can corrupt RFS flow table
(rps_sock_flow_table), making RPS/RFS much less effective.

Even in a normal situation, server handling short lived sessions suffer
from bad steering for the first data packet of a session, if another SYN
packet is received for another session.

We do following action in tcp_v4_rcv() :

	sock_rps_save_rxhash(sk, skb->rxhash);

We should _not_ do this if sk is a LISTEN socket, as about each
packet received on a LISTEN socket has a different rxhash than
previous one.
 -> RPS_NO_CPU markers are spread all over rps_sock_flow_table.

Also, it makes sense to protect sk->rxhash field changes with socket
lock (We currently can change it even if user thread owns the lock
and might use rxhash)

This patch moves sock_rps_save_rxhash() to a sock locked section,
and only for non LISTEN sockets.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 202cf09c4cd4..fe193e53af44 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1555,6 +1555,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 #endif
 
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
+		sock_rps_save_rxhash(sk, skb->rxhash);
 		TCP_CHECK_TIMER(sk);
 		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
 			rsk = sk;
@@ -1579,7 +1580,9 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 			}
 			return 0;
 		}
-	}
+	} else
+		sock_rps_save_rxhash(sk, skb->rxhash);
+
 
 	TCP_CHECK_TIMER(sk);
 	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
@@ -1672,8 +1675,6 @@ process:
 
 	skb->dev = NULL;
 
-	sock_rps_save_rxhash(sk, skb->rxhash);
-
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-- 
cgit v1.2.3-58-ga151


From c44649216522cd607a4027d2ebf4a8147d3fa94c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 05:45:47 +0000
Subject: tcp: use correct net ns in cookie_v4_check()

Its better to make a route lookup in appropriate namespace.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c24db4a3c91..9f6b22206c52 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -347,7 +347,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 					       { .sport = th->dest,
 						 .dport = th->source } } };
 		security_req_classify_flow(req, &fl);
-		if (ip_route_output_key(&init_net, &rt, &fl)) {
+		if (ip_route_output_key(sock_net(sk), &rt, &fl)) {
 			reqsk_free(req);
 			goto out;
 		}
-- 
cgit v1.2.3-58-ga151


From 2a1d4bd46047efff513600d7ff422bc344f540a6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2010 00:43:12 +0000
Subject: syncookies: make v4/v6 synflood warning behaviour the same

both syn_flood_warning functions print a message, but
ipv4 version only prints a warning if CONFIG_SYN_COOKIES=y.

Make the v4 one behave like the v6 one.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 202cf09c4cd4..a13f881e5037 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -793,19 +793,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
 	kfree(inet_rsk(req)->opt);
 }
 
-#ifdef CONFIG_SYN_COOKIES
-static void syn_flood_warning(struct sk_buff *skb)
+static void syn_flood_warning(const struct sk_buff *skb)
 {
-	static unsigned long warntime;
+	const char *msg;
 
-	if (time_after(jiffies, (warntime + HZ * 60))) {
-		warntime = jiffies;
-		printk(KERN_INFO
-		       "possible SYN flooding on port %d. Sending cookies.\n",
-		       ntohs(tcp_hdr(skb)->dest));
-	}
-}
+#ifdef CONFIG_SYN_COOKIES
+	if (sysctl_tcp_syncookies)
+		msg = "Sending cookies";
+	else
 #endif
+		msg = "Dropping request";
+
+	pr_info("TCP: Possible SYN flooding on port %d. %s.\n",
+				ntohs(tcp_hdr(skb)->dest), msg);
+}
 
 /*
  * Save and compile IPv4 options into the request_sock if needed.
@@ -1243,6 +1244,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * evidently real one.
 	 */
 	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
+		if (net_ratelimit())
+			syn_flood_warning(skb);
 #ifdef CONFIG_SYN_COOKIES
 		if (sysctl_tcp_syncookies) {
 			want_cookie = 1;
@@ -1328,7 +1331,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	if (want_cookie) {
 #ifdef CONFIG_SYN_COOKIES
-		syn_flood_warning(skb);
 		req->cookie_ts = tmp_opt.tstamp_ok;
 #endif
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
-- 
cgit v1.2.3-58-ga151


From af9b4738574b46025de7ccbe75c7b24fd8914379 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2010 00:43:44 +0000
Subject: syncookies: avoid unneeded tcp header flag double check

caller: if (!th->rst && !th->syn && th->ack)
callee: if (!th->ack)

make the caller only check for !syn (common for 3whs), and move
the !rst / ack test to the callee.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 2 +-
 net/ipv4/tcp_ipv4.c   | 2 +-
 net/ipv6/syncookies.c | 2 +-
 net/ipv6/tcp_ipv6.c   | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c24db4a3c91..c9dac8615958 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -266,7 +266,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	struct rtable *rt;
 	__u8 rcv_wscale;
 
-	if (!sysctl_tcp_syncookies || !th->ack)
+	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
 
 	if (tcp_synq_no_recent_overflow(sk) ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a13f881e5037..6558dfd899da 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1506,7 +1506,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 	}
 
 #ifdef CONFIG_SYN_COOKIES
-	if (!th->rst && !th->syn && th->ack)
+	if (!th->syn)
 		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
 #endif
 	return sk;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 12383705dbad..9fcb3ec7c838 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -174,7 +174,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
 
-	if (!sysctl_tcp_syncookies || !th->ack)
+	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
 
 	if (tcp_synq_no_recent_overflow(sk) ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e487080d02db..5887141ad641 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1157,7 +1157,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
 	}
 
 #ifdef CONFIG_SYN_COOKIES
-	if (!th->rst && !th->syn && th->ack)
+	if (!th->syn)
 		sk = cookie_v6_check(sk, skb);
 #endif
 	return sk;
-- 
cgit v1.2.3-58-ga151


From 5918e2fb9035b35164002c3a268feaf70b7c04d5 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 3 Jun 2010 00:43:57 +0000
Subject: syncookies: update mss tables

- ipv6 msstab: account for ipv6 header size
- ipv4 msstab: add mss for Jumbograms.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 38 +++++++++++++++++++-------------------
 net/ipv6/syncookies.c | 39 +++++++++++++++------------------------
 2 files changed, 34 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c9dac8615958..a7cbcc4b726b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -138,23 +138,23 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, __be32 saddr, __be32 daddr,
 }
 
 /*
- * This table has to be sorted and terminated with (__u16)-1.
- * XXX generate a better table.
- * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ * MSS Values are taken from the 2009 paper
+ * 'Measuring TCP Maximum Segment Size' by S. Alcock and R. Nelson:
+ *  - values 1440 to 1460 accounted for 80% of observed mss values
+ *  - values outside the 536-1460 range are rare (<0.2%).
+ *
+ * Table must be sorted.
  */
 static __u16 const msstab[] = {
-	64 - 1,
-	256 - 1,
-	512 - 1,
-	536 - 1,
-	1024 - 1,
-	1440 - 1,
-	1460 - 1,
-	4312 - 1,
-	(__u16)-1
+	64,
+	512,
+	536,
+	1024,
+	1440,
+	1460,
+	4312,
+	8960,
 };
-/* The number doesn't include the -1 terminator */
-#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
 
 /*
  * Generate a syncookie.  mssp points to the mss, which is returned
@@ -169,10 +169,10 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 
 	tcp_synq_overflow(sk);
 
-	/* XXX sort msstab[] by probability?  Binary search? */
-	for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
-		;
-	*mssp = msstab[mssind] + 1;
+	for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
+		if (mss >= msstab[mssind])
+			break;
+	*mssp = msstab[mssind];
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
 
@@ -202,7 +202,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 					    jiffies / (HZ * 60),
 					    COUNTER_TRIES);
 
-	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
+	return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
 }
 
 static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 9fcb3ec7c838..70d330f8c990 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -27,28 +27,17 @@ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
 #define COOKIEBITS 24	/* Upper bits store count */
 #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
 
-/*
- * This table has to be sorted and terminated with (__u16)-1.
- * XXX generate a better table.
- * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
- *
- * Taken directly from ipv4 implementation.
- * Should this list be modified for ipv6 use or is it close enough?
- * rfc 2460 8.3 suggests mss values 20 bytes less than ipv4 counterpart
- */
+/* Table must be sorted. */
 static __u16 const msstab[] = {
-	64 - 1,
-	256 - 1,
-	512 - 1,
-	536 - 1,
-	1024 - 1,
-	1440 - 1,
-	1460 - 1,
-	4312 - 1,
-	(__u16)-1
+	64,
+	512,
+	536,
+	1280 - 60,
+	1480 - 60,
+	1500 - 60,
+	4460 - 60,
+	9000 - 60,
 };
-/* The number doesn't include the -1 terminator */
-#define NUM_MSS (ARRAY_SIZE(msstab) - 1)
 
 /*
  * This (misnamed) value is the age of syncookie which is permitted.
@@ -134,9 +123,11 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
 
 	tcp_synq_overflow(sk);
 
-	for (mssind = 0; mss > msstab[mssind + 1]; mssind++)
-		;
-	*mssp = msstab[mssind] + 1;
+	for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
+		if (mss >= msstab[mssind])
+			break;
+
+	*mssp = msstab[mssind];
 
 	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
 
@@ -154,7 +145,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
 					    th->source, th->dest, seq,
 					    jiffies / (HZ * 60), COUNTER_TRIES);
 
-	return mssind < NUM_MSS ? msstab[mssind] + 1 : 0;
+	return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
 }
 
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3-58-ga151


From b78462ebc6a4ef9074aa80abebcdd470dc5f0ce0 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 2 Jun 2010 12:24:37 +0000
Subject: skbuff: add check for non-linear to warn_if_lro and needs_linearize

We can avoid an unecessary cache miss by checking if the skb is non-linear
before accessing gso_size/gso_type in skb_warn_if_lro, the same can also be
done to avoid a cache miss on nr_frags if data_len is 0.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 3 ++-
 net/core/dev.c         | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bf243fc54959..645e78d395fd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2129,7 +2129,8 @@ static inline bool skb_warn_if_lro(const struct sk_buff *skb)
 	/* LRO sets gso_size but not gso_type, whereas if GSO is really
 	 * wanted then gso_type will be set. */
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
-	if (shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) {
+	if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 &&
+	    unlikely(shinfo->gso_type == 0)) {
 		__skb_warn_lro_forwarding(skb);
 		return true;
 	}
diff --git a/net/core/dev.c b/net/core/dev.c
index ec01a5998d70..3abb3a6058be 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2103,9 +2103,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 static inline int skb_needs_linearize(struct sk_buff *skb,
 				      struct net_device *dev)
 {
-	return (skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-	       (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
-					      illegal_highdma(dev, skb)));
+	return skb_is_nonlinear(skb) &&
+	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
+	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+					      illegal_highdma(dev, skb))));
 }
 
 /**
-- 
cgit v1.2.3-58-ga151


From 72e09ad107e78d69ff4d3b97a69f0aad2b77280f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 5 Jun 2010 03:03:30 -0700
Subject: ipv6: avoid high order allocations

With mtu=9000, mld_newpack() use order-2 GFP_ATOMIC allocations, that
are very unreliable, on machines where PAGE_SIZE=4K

Limit allocated skbs to be at most one page. (order-0 allocations)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 59f1881968c7..ab1622d7d409 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1356,7 +1356,10 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 		     IPV6_TLV_PADN, 0 };
 
 	/* we assume size > sizeof(ra) here */
-	skb = sock_alloc_send_skb(sk, size + LL_ALLOCATED_SPACE(dev), 1, &err);
+	size += LL_ALLOCATED_SPACE(dev);
+	/* limit our allocations to order-0 page */
+	size = min_t(int, size, SKB_MAX_ORDER(0, 0));
+	skb = sock_alloc_send_skb(sk, size, 1, &err);
 
 	if (!skb)
 		return NULL;
-- 
cgit v1.2.3-58-ga151


From 8ffb335e8d696affc04f963bf73ce2196f80edb9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 6 Jun 2010 15:34:40 -0700
Subject: ip6mr: fix a typo in ip6mr_for_each_table()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 073071f2b75b..89c0b077c7aa 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -120,7 +120,7 @@ static void mroute_clean_tables(struct mr6_table *mrt);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
-#define ip6mr_for_each_table(mrt, met) \
+#define ip6mr_for_each_table(mrt, net) \
 	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
 
 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
-- 
cgit v1.2.3-58-ga151


From a8b690f98baf9fb1902b8eeab801351ea603fa3a Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 7 Jun 2010 00:43:42 -0700
Subject: tcp: Fix slowness in read /proc/net/tcp

This patch address a serious performance issue in reading the
TCP sockets table (/proc/net/tcp).

Reading the full table is done by a number of sequential read
operations.  At each read operation, a seek is done to find the
last socket that was previously read.  This seek operation requires
that the sockets in the table need to be counted up to the current
file position, and to count each of these requires taking a lock for
each non-empty bucket.  The whole algorithm is O(n^2).

The fix is to cache the last bucket value, offset within the bucket,
and the file position returned by the last read operation.   On the
next sequential read, the bucket and offset are used to find the
last read socket immediately without needing ot scan the previous
buckets  the table.  This algorithm t read the whole table is O(n).

The improvement offered by this patch is easily show by performing
cat'ing /proc/net/tcp on a machine with a lot of connections.  With
about 182K connections in the table, I see the following:

- Without patch
time cat /proc/net/tcp > /dev/null

real	1m56.729s
user	0m0.214s
sys	1m56.344s

- With patch
time cat /proc/net/tcp > /dev/null

real	0m0.894s
user	0m0.290s
sys	0m0.594s

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   |  3 +-
 net/ipv4/tcp_ipv4.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 86 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a1449144848a..573166484413 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1413,7 +1413,8 @@ struct tcp_iter_state {
 	sa_family_t		family;
 	enum tcp_seq_states	state;
 	struct sock		*syn_wait_sk;
-	int			bucket, sbucket, num, uid;
+	int			bucket, offset, sbucket, num, uid;
+	loff_t			last_pos;
 };
 
 extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index acdc4c989853..7f976af27bf0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1980,6 +1980,11 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
 		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
 }
 
+/*
+ * Get next listener socket follow cur.  If cur is NULL, get first socket
+ * starting from bucket given in st->bucket; when st->bucket is zero the
+ * very first socket in the hash table is returned.
+ */
 static void *listening_get_next(struct seq_file *seq, void *cur)
 {
 	struct inet_connection_sock *icsk;
@@ -1990,14 +1995,15 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 	struct net *net = seq_file_net(seq);
 
 	if (!sk) {
-		st->bucket = 0;
-		ilb = &tcp_hashinfo.listening_hash[0];
+		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
 		sk = sk_nulls_head(&ilb->head);
+		st->offset = 0;
 		goto get_sk;
 	}
 	ilb = &tcp_hashinfo.listening_hash[st->bucket];
 	++st->num;
+	++st->offset;
 
 	if (st->state == TCP_SEQ_STATE_OPENREQ) {
 		struct request_sock *req = cur;
@@ -2012,6 +2018,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
 				}
 				req = req->dl_next;
 			}
+			st->offset = 0;
 			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
 				break;
 get_req:
@@ -2047,6 +2054,7 @@ start_req:
 		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
 	}
 	spin_unlock_bh(&ilb->lock);
+	st->offset = 0;
 	if (++st->bucket < INET_LHTABLE_SIZE) {
 		ilb = &tcp_hashinfo.listening_hash[st->bucket];
 		spin_lock_bh(&ilb->lock);
@@ -2060,7 +2068,12 @@ out:
 
 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 {
-	void *rc = listening_get_next(seq, NULL);
+	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	st->bucket = 0;
+	st->offset = 0;
+	rc = listening_get_next(seq, NULL);
 
 	while (rc && *pos) {
 		rc = listening_get_next(seq, rc);
@@ -2075,13 +2088,18 @@ static inline int empty_bucket(struct tcp_iter_state *st)
 		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
 }
 
+/*
+ * Get first established socket starting from bucket given in st->bucket.
+ * If st->bucket is zero, the very first socket in the hash is returned.
+ */
 static void *established_get_first(struct seq_file *seq)
 {
 	struct tcp_iter_state *st = seq->private;
 	struct net *net = seq_file_net(seq);
 	void *rc = NULL;
 
-	for (st->bucket = 0; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
+	st->offset = 0;
+	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
 		struct sock *sk;
 		struct hlist_nulls_node *node;
 		struct inet_timewait_sock *tw;
@@ -2126,6 +2144,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
 	struct net *net = seq_file_net(seq);
 
 	++st->num;
+	++st->offset;
 
 	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
 		tw = cur;
@@ -2142,6 +2161,7 @@ get_tw:
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
 		/* Look for next non empty bucket */
+		st->offset = 0;
 		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
 				empty_bucket(st))
 			;
@@ -2169,7 +2189,11 @@ out:
 
 static void *established_get_idx(struct seq_file *seq, loff_t pos)
 {
-	void *rc = established_get_first(seq);
+	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	st->bucket = 0;
+	rc = established_get_first(seq);
 
 	while (rc && pos) {
 		rc = established_get_next(seq, rc);
@@ -2194,24 +2218,72 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
 	return rc;
 }
 
+static void *tcp_seek_last_pos(struct seq_file *seq)
+{
+	struct tcp_iter_state *st = seq->private;
+	int offset = st->offset;
+	int orig_num = st->num;
+	void *rc = NULL;
+
+	switch (st->state) {
+	case TCP_SEQ_STATE_OPENREQ:
+	case TCP_SEQ_STATE_LISTENING:
+		if (st->bucket >= INET_LHTABLE_SIZE)
+			break;
+		st->state = TCP_SEQ_STATE_LISTENING;
+		rc = listening_get_next(seq, NULL);
+		while (offset-- && rc)
+			rc = listening_get_next(seq, rc);
+		if (rc)
+			break;
+		st->bucket = 0;
+		/* Fallthrough */
+	case TCP_SEQ_STATE_ESTABLISHED:
+	case TCP_SEQ_STATE_TIME_WAIT:
+		st->state = TCP_SEQ_STATE_ESTABLISHED;
+		if (st->bucket > tcp_hashinfo.ehash_mask)
+			break;
+		rc = established_get_first(seq);
+		while (offset-- && rc)
+			rc = established_get_next(seq, rc);
+	}
+
+	st->num = orig_num;
+
+	return rc;
+}
+
 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct tcp_iter_state *st = seq->private;
+	void *rc;
+
+	if (*pos && *pos == st->last_pos) {
+		rc = tcp_seek_last_pos(seq);
+		if (rc)
+			goto out;
+	}
+
 	st->state = TCP_SEQ_STATE_LISTENING;
 	st->num = 0;
-	return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+	st->bucket = 0;
+	st->offset = 0;
+	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+
+out:
+	st->last_pos = *pos;
+	return rc;
 }
 
 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct tcp_iter_state *st = seq->private;
 	void *rc = NULL;
-	struct tcp_iter_state *st;
 
 	if (v == SEQ_START_TOKEN) {
 		rc = tcp_get_idx(seq, 0);
 		goto out;
 	}
-	st = seq->private;
 
 	switch (st->state) {
 	case TCP_SEQ_STATE_OPENREQ:
@@ -2219,6 +2291,8 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 		rc = listening_get_next(seq, v);
 		if (!rc) {
 			st->state = TCP_SEQ_STATE_ESTABLISHED;
+			st->bucket = 0;
+			st->offset = 0;
 			rc	  = established_get_first(seq);
 		}
 		break;
@@ -2229,6 +2303,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	}
 out:
 	++*pos;
+	st->last_pos = *pos;
 	return rc;
 }
 
@@ -2267,6 +2342,7 @@ static int tcp_seq_open(struct inode *inode, struct file *file)
 
 	s = ((struct seq_file *)file->private_data)->private;
 	s->family		= afinfo->family;
+	s->last_pos 		= 0;
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From fe33147a58e7d1d3086bf823aabfd491d843be82 Mon Sep 17 00:00:00 2001
From: Alex Lorca <alex.lorca@gmail.com>
Date: Mon, 7 Jun 2010 01:01:22 -0700
Subject: net-caif: Added missing lock validator constants

CAIF is using "xxx-AF_MAX" strings for the lock validator. It should use
its own strings.

Signed-off-by: Alex Lorca <alex.lorca@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 2cf7f9f7e775..f9ce0db41cd6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -156,7 +156,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = {
   "sk_lock-27"       , "sk_lock-28"          , "sk_lock-AF_CAN"      ,
   "sk_lock-AF_TIPC"  , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV"        ,
   "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN"     , "sk_lock-AF_PHONET"   ,
-  "sk_lock-AF_IEEE802154",
+  "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" ,
   "sk_lock-AF_MAX"
 };
 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
@@ -172,7 +172,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-27"       , "slock-28"          , "slock-AF_CAN"      ,
   "slock-AF_TIPC"  , "slock-AF_BLUETOOTH", "slock-AF_IUCV"     ,
   "slock-AF_RXRPC" , "slock-AF_ISDN"     , "slock-AF_PHONET"   ,
-  "slock-AF_IEEE802154",
+  "slock-AF_IEEE802154", "slock-AF_CAIF" ,
   "slock-AF_MAX"
 };
 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
@@ -188,7 +188,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-27"       , "clock-28"          , "clock-AF_CAN"      ,
   "clock-AF_TIPC"  , "clock-AF_BLUETOOTH", "clock-AF_IUCV"     ,
   "clock-AF_RXRPC" , "clock-AF_ISDN"     , "clock-AF_PHONET"   ,
-  "clock-AF_IEEE802154",
+  "clock-AF_IEEE802154", "clock-AF_CAIF" ,
   "clock-AF_MAX"
 };
 
-- 
cgit v1.2.3-58-ga151


From 1789a640f55658d9a54c1868cc3405e4d85dbd8e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Jun 2010 22:23:57 +0000
Subject: raw: avoid two atomics in xmit

Avoid two atomic ops per raw_send_hdrinc() call

Avoid two atomic ops per raw6_send_hdrinc() call

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/raw.c |  8 +++++---
 net/ipv6/raw.c | 12 +++++++-----
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2c7a1639388a..66cc3befcd44 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -314,7 +314,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
 }
 
 static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
-			struct rtable *rt,
+			struct rtable **rtp,
 			unsigned int flags)
 {
 	struct inet_sock *inet = inet_sk(sk);
@@ -323,6 +323,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	struct sk_buff *skb;
 	unsigned int iphlen;
 	int err;
+	struct rtable *rt = *rtp;
 
 	if (length > rt->u.dst.dev->mtu) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
@@ -341,7 +342,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, &rt->u.dst);
+	*rtp = NULL;
 
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
@@ -576,7 +578,7 @@ back_from_confirm:
 
 	if (inet->hdrincl)
 		err = raw_send_hdrinc(sk, msg->msg_iov, len,
-					rt, msg->msg_flags);
+					&rt, msg->msg_flags);
 
 	 else {
 		if (!ipc.addr)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 864eb8e03b1b..968b96490729 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -602,13 +602,14 @@ out:
 }
 
 static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
-			struct flowi *fl, struct rt6_info *rt,
+			struct flowi *fl, struct dst_entry **dstp,
 			unsigned int flags)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct ipv6hdr *iph;
 	struct sk_buff *skb;
 	int err;
+	struct rt6_info *rt = (struct rt6_info *)*dstp;
 
 	if (length > rt->u.dst.dev->mtu) {
 		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
@@ -626,7 +627,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, &rt->u.dst);
+	*dstp = NULL;
 
 	skb_put(skb, length);
 	skb_reset_network_header(skb);
@@ -886,9 +888,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		goto do_confirm;
 
 back_from_confirm:
-	if (inet->hdrincl) {
-		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
-	} else {
+	if (inet->hdrincl)
+		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, &dst, msg->msg_flags);
+	else {
 		lock_sock(sk);
 		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
 			len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
-- 
cgit v1.2.3-58-ga151


From f2a03367c072150c881fa23ce3d3f76b8236018f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 4 Jun 2010 01:33:33 +0000
Subject: htb: remove two unnecessary assignments

remove two unnecessary assignments

we don't need to assign NULL when initialize structure objects.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/sched/sch_htb.c |    2 --
 1 file changed, 2 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_htb.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 0b52b8de562c..4be8d04b262d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1550,7 +1550,6 @@ static const struct Qdisc_class_ops htb_class_ops = {
 };
 
 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
-	.next		=	NULL,
 	.cl_ops		=	&htb_class_ops,
 	.id		=	"htb",
 	.priv_size	=	sizeof(struct htb_sched),
@@ -1561,7 +1560,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
 	.init		=	htb_init,
 	.reset		=	htb_reset,
 	.destroy	=	htb_destroy,
-	.change		=	NULL /* htb_change */,
 	.dump		=	htb_dump,
 	.owner		=	THIS_MODULE,
 };
-- 
cgit v1.2.3-58-ga151


From 9dacaf17a60101a55d456cc7b00e269d8145aa0d Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Fri, 4 Jun 2010 02:43:06 +0000
Subject: net sched: make pedit check for clones instead

Now that the core path doesnt set OK to munge we detect
writable skbs by looking to see if they are cloned.

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_pedit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 50e3d945e1f4..a0593c9640db 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -127,8 +127,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 	int i, munged = 0;
 	unsigned int off;
 
-	if (!(skb->tc_verd & TC_OK2MUNGE)) {
-		/* should we set skb->cloned? */
+	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
 			return p->tcf_action;
 		}
-- 
cgit v1.2.3-58-ga151


From 271c1dfa61bc90a57648ff96f3eb92d4b4d4f11e Mon Sep 17 00:00:00 2001
From: jamal <hadi@cyberus.ca>
Date: Fri, 4 Jun 2010 02:06:22 +0000
Subject: net: Remove unnecessary net action assertion

The extra assertion to allow packet munging only when there are
no other ptypes listening which may have worked around an old bug
is unnecessary. It is sufficient to check if the skb is cloned before
trampling on it. Thanks to Herbert Xu for being persistent and patient
in getting this across.
[Note that cloning checks and assertions are the general rule used
by tc actions (documentation/networking/tc-actions-env-rules.txt)].

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index b65347c2cf2a..c8d127718ff1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2663,9 +2663,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
-	} else {
-		/* Huh? Why does turning on AF_PACKET affect this? */
-		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
 	}
 
 	switch (ing_filter(skb)) {
-- 
cgit v1.2.3-58-ga151


From 035320d54758e21227987e3aae0d46e7a04f4ddc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 6 Jun 2010 23:48:40 +0000
Subject: ipmr: dont corrupt lists

ipmr_rules_exit() and ip6mr_rules_exit() free a list of items, but
forget to properly remove these items from list. List head is not
changed and still points to freed memory.

This can trigger a fault later when icmpv6_sk_exit() is called.

Fix is to either reinit list, or use list_del() to properly remove items
from list before freeing them.

bugzilla report : https://bugzilla.kernel.org/show_bug.cgi?id=16120

Introduced by commit d1db275dd3f6e4 (ipv6: ip6mr: support multiple
tables) and commit f0ad0860d01e (ipv4: ipmr: support multiple tables)

Reported-by: Alex Zhavnerchik <alex.vizor@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c  | 4 +++-
 net/ipv6/ip6mr.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 856123fe32f9..757f25eb9b4b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -267,8 +267,10 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 {
 	struct mr_table *mrt, *next;
 
-	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
+	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
+		list_del(&mrt->list);
 		kfree(mrt);
+	}
 	fib_rules_unregister(net->ipv4.mr_rules_ops);
 }
 #else
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 89c0b077c7aa..66078dad7fe8 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -254,8 +254,10 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
 {
 	struct mr6_table *mrt, *next;
 
-	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list)
+	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
+		list_del(&mrt->list);
 		ip6mr_free_table(mrt);
+	}
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 }
 #else
-- 
cgit v1.2.3-58-ga151


From 66018506e15bea62de4eefc3298f170b4bfcf5ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 03:12:08 +0000
Subject: ip: Router Alert RCU conversion

Straightforward conversion to RCU.

One rwlock becomes a spinlock, and is static.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  2 +-
 net/ipv4/ip_input.c    | 11 +++--------
 net/ipv4/ip_sockglue.c | 23 ++++++++++++++---------
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 452f229c380a..9982c97f0bdc 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -62,10 +62,10 @@ struct ip_ra_chain {
 	struct ip_ra_chain	*next;
 	struct sock		*sk;
 	void			(*destructor)(struct sock *);
+	struct rcu_head		rcu;
 };
 
 extern struct ip_ra_chain *ip_ra_chain;
-extern rwlock_t ip_ra_lock;
 
 /* IP flags. */
 #define IP_CE		0x8000		/* Flag: "Congestion"		*/
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d52c9da644cf..d274078b1665 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -146,7 +146,7 @@
 #include <linux/netlink.h>
 
 /*
- *	Process Router Attention IP option
+ *	Process Router Attention IP option (RFC 2113)
  */
 int ip_call_ra_chain(struct sk_buff *skb)
 {
@@ -155,8 +155,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
 	struct sock *last = NULL;
 	struct net_device *dev = skb->dev;
 
-	read_lock(&ip_ra_lock);
-	for (ra = ip_ra_chain; ra; ra = ra->next) {
+	for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
 		struct sock *sk = ra->sk;
 
 		/* If socket is bound to an interface, only report
@@ -167,10 +166,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
 		     sk->sk_bound_dev_if == dev->ifindex) &&
 		    net_eq(sock_net(sk), dev_net(dev))) {
 			if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
-				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
-					read_unlock(&ip_ra_lock);
+				if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
 					return 1;
-				}
 			}
 			if (last) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -183,10 +180,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
 
 	if (last) {
 		raw_rcv(last, skb);
-		read_unlock(&ip_ra_lock);
 		return 1;
 	}
-	read_unlock(&ip_ra_lock);
 	return 0;
 }
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ce231780a2b1..08b9519a24f4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -239,7 +239,12 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
    sent to multicast group to reach destination designated router.
  */
 struct ip_ra_chain *ip_ra_chain;
-DEFINE_RWLOCK(ip_ra_lock);
+static DEFINE_SPINLOCK(ip_ra_lock);
+
+static void ip_ra_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct ip_ra_chain, rcu));
+}
 
 int ip_ra_control(struct sock *sk, unsigned char on,
 		  void (*destructor)(struct sock *))
@@ -251,35 +256,35 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 
 	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-	write_lock_bh(&ip_ra_lock);
+	spin_lock_bh(&ip_ra_lock);
 	for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
 		if (ra->sk == sk) {
 			if (on) {
-				write_unlock_bh(&ip_ra_lock);
+				spin_unlock_bh(&ip_ra_lock);
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
-			*rap = ra->next;
-			write_unlock_bh(&ip_ra_lock);
+			rcu_assign_pointer(*rap, ra->next);
+			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
 			sock_put(sk);
-			kfree(ra);
+			call_rcu(&ra->rcu, ip_ra_free_rcu);
 			return 0;
 		}
 	}
 	if (new_ra == NULL) {
-		write_unlock_bh(&ip_ra_lock);
+		spin_unlock_bh(&ip_ra_lock);
 		return -ENOBUFS;
 	}
 	new_ra->sk = sk;
 	new_ra->destructor = destructor;
 
 	new_ra->next = ra;
-	*rap = new_ra;
+	rcu_assign_pointer(*rap, new_ra);
 	sock_hold(sk);
-	write_unlock_bh(&ip_ra_lock);
+	spin_unlock_bh(&ip_ra_lock);
 
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From 9a57a9d291980302b4a3184fbc47dbddac71903e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 03:17:10 +0000
Subject: igmp: avoid two atomic ops in igmp_rcv()

in_dev_get() -> __in_dev_get_rcu() in a rcu protected function.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 250cb5e1af48..3294f547c481 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -916,18 +916,19 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 	read_unlock(&in_dev->mc_list_lock);
 }
 
+/* called in rcu_read_lock() section */
 int igmp_rcv(struct sk_buff *skb)
 {
 	/* This basically follows the spec line by line -- see RFC1112 */
 	struct igmphdr *ih;
-	struct in_device *in_dev = in_dev_get(skb->dev);
+	struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
 	int len = skb->len;
 
 	if (in_dev == NULL)
 		goto drop;
 
 	if (!pskb_may_pull(skb, sizeof(struct igmphdr)))
-		goto drop_ref;
+		goto drop;
 
 	switch (skb->ip_summed) {
 	case CHECKSUM_COMPLETE:
@@ -937,7 +938,7 @@ int igmp_rcv(struct sk_buff *skb)
 	case CHECKSUM_NONE:
 		skb->csum = 0;
 		if (__skb_checksum_complete(skb))
-			goto drop_ref;
+			goto drop;
 	}
 
 	ih = igmp_hdr(skb);
@@ -957,7 +958,6 @@ int igmp_rcv(struct sk_buff *skb)
 		break;
 	case IGMP_PIM:
 #ifdef CONFIG_IP_PIMSM_V1
-		in_dev_put(in_dev);
 		return pim_rcv_v1(skb);
 #endif
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
@@ -971,8 +971,6 @@ int igmp_rcv(struct sk_buff *skb)
 		break;
 	}
 
-drop_ref:
-	in_dev_put(in_dev);
 drop:
 	kfree_skb(skb);
 	return 0;
-- 
cgit v1.2.3-58-ga151


From ed7865a47d4759e85bbd7bed44bee411d868eaad Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 21:49:44 -0700
Subject: ipv4: avoid two atomic ops in ip_rt_redirect()

in_dev_get() -> __in_dev_get_rcu() in a rcu protected function.

[ Fix build with CONFIG_IP_ROUTE_VERBOSE disabled. -DaveM ]

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/route.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7b8eacd5ac26..883b5c7195ac 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1343,11 +1343,12 @@ static void rt_del(unsigned hash, struct rtable *rt)
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 }
 
+/* called in rcu_read_lock() section */
 void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 		    __be32 saddr, struct net_device *dev)
 {
 	int i, k;
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev = __in_dev_get_rcu(dev);
 	struct rtable *rth, **rthp;
 	__be32  skeys[2] = { saddr, 0 };
 	int  ikeys[2] = { dev->ifindex, 0 };
@@ -1383,7 +1384,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 			rthp=&rt_hash_table[hash].chain;
 
-			rcu_read_lock();
 			while ((rth = rcu_dereference(*rthp)) != NULL) {
 				struct rtable *rt;
 
@@ -1405,12 +1405,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 					break;
 
 				dst_hold(&rth->u.dst);
-				rcu_read_unlock();
 
 				rt = dst_alloc(&ipv4_dst_ops);
 				if (rt == NULL) {
 					ip_rt_put(rth);
-					in_dev_put(in_dev);
 					return;
 				}
 
@@ -1463,12 +1461,10 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 					ip_rt_put(rt);
 				goto do_next;
 			}
-			rcu_read_unlock();
 		do_next:
 			;
 		}
 	}
-	in_dev_put(in_dev);
 	return;
 
 reject_redirect:
@@ -1479,7 +1475,7 @@ reject_redirect:
 		       &old_gw, dev->name, &new_gw,
 		       &saddr, &daddr);
 #endif
-	in_dev_put(in_dev);
+	;
 }
 
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
-- 
cgit v1.2.3-58-ga151


From 6e8b11b43b0c2e901734e2cdd70c6e325a90c4ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 03:54:46 +0000
Subject: net: avoid two atomic ops in ip_rcv_options()

in_dev_get() -> __in_dev_get_rcu() in a rcu protected function.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_input.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index d274078b1665..08a3b121f908 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -293,18 +293,16 @@ static inline int ip_rcv_options(struct sk_buff *skb)
 	}
 
 	if (unlikely(opt->srr)) {
-		struct in_device *in_dev = in_dev_get(dev);
+		struct in_device *in_dev = __in_dev_get_rcu(dev);
+
 		if (in_dev) {
 			if (!IN_DEV_SOURCE_ROUTE(in_dev)) {
 				if (IN_DEV_LOG_MARTIANS(in_dev) &&
 				    net_ratelimit())
 					printk(KERN_INFO "source route option %pI4 -> %pI4\n",
 					       &iph->saddr, &iph->daddr);
-				in_dev_put(in_dev);
 				goto drop;
 			}
-
-			in_dev_put(in_dev);
 		}
 
 		if (ip_options_rcv_srr(skb))
-- 
cgit v1.2.3-58-ga151


From bb69ae049fcc986fcd742eb90ca0d44a7a49c9f1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 11:42:13 +0000
Subject: anycast: Some RCU conversions

- dev_get_by_flags() changed to dev_get_by_flags_rcu()

- ipv6_sock_ac_join() dont touch dev & idev refcounts
- ipv6_sock_ac_drop() dont touch dev & idev refcounts
- ipv6_sock_ac_close() dont touch dev & idev refcounts
- ipv6_dev_ac_dec() dount touch idev refcount
- ipv6_chk_acast_addr() dont touch idev refcount

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +--
 net/core/dev.c            | 14 +++-----
 net/ipv6/anycast.c        | 90 ++++++++++++++++++++++-------------------------
 3 files changed, 49 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5156b806924c..c319f28d699d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1271,8 +1271,8 @@ extern void		dev_add_pack(struct packet_type *pt);
 extern void		dev_remove_pack(struct packet_type *pt);
 extern void		__dev_remove_pack(struct packet_type *pt);
 
-extern struct net_device	*dev_get_by_flags(struct net *net, unsigned short flags,
-						  unsigned short mask);
+extern struct net_device	*dev_get_by_flags_rcu(struct net *net, unsigned short flags,
+						      unsigned short mask);
 extern struct net_device	*dev_get_by_name(struct net *net, const char *name);
 extern struct net_device	*dev_get_by_name_rcu(struct net *net, const char *name);
 extern struct net_device	*__dev_get_by_name(struct net *net, const char *name);
diff --git a/net/core/dev.c b/net/core/dev.c
index c8d127718ff1..6f330cee79a6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -803,35 +803,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 /**
- *	dev_get_by_flags - find any device with given flags
+ *	dev_get_by_flags_rcu - find any device with given flags
  *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
  *	Search for any interface with the given flags. Returns NULL if a device
- *	is not found or a pointer to the device. The device returned has
- *	had a reference added and the pointer is safe until the user calls
- *	dev_put to indicate they have finished with it.
+ *	is not found or a pointer to the device. Must be called inside
+ *	rcu_read_lock(), and result refcount is unchanged.
  */
 
-struct net_device *dev_get_by_flags(struct net *net, unsigned short if_flags,
+struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
 				    unsigned short mask)
 {
 	struct net_device *dev, *ret;
 
 	ret = NULL;
-	rcu_read_lock();
 	for_each_netdev_rcu(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
-			dev_hold(dev);
 			ret = dev;
 			break;
 		}
 	}
-	rcu_read_unlock();
 	return ret;
 }
-EXPORT_SYMBOL(dev_get_by_flags);
+EXPORT_SYMBOL(dev_get_by_flags_rcu);
 
 /**
  *	dev_valid_name - check if name is okay for network device
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index b5b07054508a..f058fbd808c8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -77,41 +77,40 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 	pac->acl_next = NULL;
 	ipv6_addr_copy(&pac->acl_addr, addr);
 
+	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dev_hold(dev);
 			dst_release(&rt->u.dst);
 		} else if (ishost) {
 			err = -EADDRNOTAVAIL;
-			goto out_free_pac;
+			goto error;
 		} else {
 			/* router, no matching interface: just pick one */
-
-			dev = dev_get_by_flags(net, IFF_UP, IFF_UP|IFF_LOOPBACK);
+			dev = dev_get_by_flags_rcu(net, IFF_UP,
+						   IFF_UP | IFF_LOOPBACK);
 		}
 	} else
-		dev = dev_get_by_index(net, ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (dev == NULL) {
 		err = -ENODEV;
-		goto out_free_pac;
+		goto error;
 	}
 
-	idev = in6_dev_get(dev);
+	idev = __in6_dev_get(dev);
 	if (!idev) {
 		if (ifindex)
 			err = -ENODEV;
 		else
 			err = -EADDRNOTAVAIL;
-		goto out_dev_put;
+		goto error;
 	}
 	/* reset ishost, now that we have a specific device */
 	ishost = !idev->cnf.forwarding;
-	in6_dev_put(idev);
 
 	pac->acl_ifindex = dev->ifindex;
 
@@ -124,26 +123,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		if (ishost)
 			err = -EADDRNOTAVAIL;
 		if (err)
-			goto out_dev_put;
+			goto error;
 	}
 
 	err = ipv6_dev_ac_inc(dev, addr);
-	if (err)
-		goto out_dev_put;
-
-	write_lock_bh(&ipv6_sk_ac_lock);
-	pac->acl_next = np->ipv6_ac_list;
-	np->ipv6_ac_list = pac;
-	write_unlock_bh(&ipv6_sk_ac_lock);
-
-	dev_put(dev);
-
-	return 0;
+	if (!err) {
+		write_lock_bh(&ipv6_sk_ac_lock);
+		pac->acl_next = np->ipv6_ac_list;
+		np->ipv6_ac_list = pac;
+		write_unlock_bh(&ipv6_sk_ac_lock);
+		pac = NULL;
+	}
 
-out_dev_put:
-	dev_put(dev);
-out_free_pac:
-	sock_kfree_s(sk, pac, sizeof(*pac));
+error:
+	rcu_read_unlock();
+	if (pac)
+		sock_kfree_s(sk, pac, sizeof(*pac));
 	return err;
 }
 
@@ -176,11 +171,12 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
 
 	write_unlock_bh(&ipv6_sk_ac_lock);
 
-	dev = dev_get_by_index(net, pac->acl_ifindex);
-	if (dev) {
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+	if (dev)
 		ipv6_dev_ac_dec(dev, &pac->acl_addr);
-		dev_put(dev);
-	}
+	rcu_read_unlock();
+
 	sock_kfree_s(sk, pac, sizeof(*pac));
 	return 0;
 }
@@ -199,13 +195,12 @@ void ipv6_sock_ac_close(struct sock *sk)
 	write_unlock_bh(&ipv6_sk_ac_lock);
 
 	prev_index = 0;
+	rcu_read_lock();
 	while (pac) {
 		struct ipv6_ac_socklist *next = pac->acl_next;
 
 		if (pac->acl_ifindex != prev_index) {
-			if (dev)
-				dev_put(dev);
-			dev = dev_get_by_index(net, pac->acl_ifindex);
+			dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
 			prev_index = pac->acl_ifindex;
 		}
 		if (dev)
@@ -213,8 +208,7 @@ void ipv6_sock_ac_close(struct sock *sk)
 		sock_kfree_s(sk, pac, sizeof(*pac));
 		pac = next;
 	}
-	if (dev)
-		dev_put(dev);
+	rcu_read_unlock();
 }
 
 #if 0
@@ -363,33 +357,32 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	return 0;
 }
 
+/* called with rcu_read_lock() */
 static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
 {
-	int ret;
-	struct inet6_dev *idev = in6_dev_get(dev);
+	struct inet6_dev *idev = __in6_dev_get(dev);
+
 	if (idev == NULL)
 		return -ENODEV;
-	ret = __ipv6_dev_ac_dec(idev, addr);
-	in6_dev_put(idev);
-	return ret;
+	return __ipv6_dev_ac_dec(idev, addr);
 }
 
 /*
  *	check if the interface has this anycast address
+ *	called with rcu_read_lock()
  */
 static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
 {
 	struct inet6_dev *idev;
 	struct ifacaddr6 *aca;
 
-	idev = in6_dev_get(dev);
+	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
 		for (aca = idev->ac_list; aca; aca = aca->aca_next)
 			if (ipv6_addr_equal(&aca->aca_addr, addr))
 				break;
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 		return aca != NULL;
 	}
 	return 0;
@@ -403,14 +396,15 @@ int ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
 {
 	int found = 0;
 
-	if (dev)
-		return ipv6_chk_acast_dev(dev, addr);
 	rcu_read_lock();
-	for_each_netdev_rcu(net, dev)
-		if (ipv6_chk_acast_dev(dev, addr)) {
-			found = 1;
-			break;
-		}
+	if (dev)
+		found = ipv6_chk_acast_dev(dev, addr);
+	else
+		for_each_netdev_rcu(net, dev)
+			if (ipv6_chk_acast_dev(dev, addr)) {
+				found = 1;
+				break;
+			}
 	rcu_read_unlock();
 	return found;
 }
-- 
cgit v1.2.3-58-ga151


From 5bfddbd46a95c978f4d3c992339cbdf4f4b790a3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 8 Jun 2010 16:09:52 +0200
Subject: netfilter: nf_conntrack: IPS_UNTRACKED bit

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet. This is bad for performance.
__read_mostly annotation is also a bad choice.

This patch introduces IPS_UNTRACKED bit so that we can use later a
per_cpu untrack structure more easily.

A new helper, nf_ct_untracked_get() returns a pointer to
nf_conntrack_untracked.

Another one, nf_ct_untracked_status_or() is used by nf_nat_init() to add
IPS_NAT_DONE_MASK bits to untracked status.

nf_ct_is_untracked() prototype is changed to work on a nf_conn pointer.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h  |  4 ++++
 include/net/netfilter/nf_conntrack.h           | 12 +++++++++---
 include/net/netfilter/nf_conntrack_core.h      |  2 +-
 net/ipv4/netfilter/nf_nat_core.c               |  2 +-
 net/ipv4/netfilter/nf_nat_standalone.c         |  2 +-
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |  2 +-
 net/netfilter/nf_conntrack_core.c              | 11 ++++++++---
 net/netfilter/nf_conntrack_netlink.c           |  2 +-
 net/netfilter/xt_CT.c                          |  4 ++--
 net/netfilter/xt_NOTRACK.c                     |  2 +-
 net/netfilter/xt_TEE.c                         |  4 ++--
 net/netfilter/xt_cluster.c                     |  2 +-
 net/netfilter/xt_conntrack.c                   | 11 ++++++-----
 net/netfilter/xt_socket.c                      |  2 +-
 net/netfilter/xt_state.c                       | 14 ++++++++------
 15 files changed, 47 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 14e6d32002c4..1afd18c855ec 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -76,6 +76,10 @@ enum ip_conntrack_status {
 	/* Conntrack is a template */
 	IPS_TEMPLATE_BIT = 11,
 	IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
+
+	/* Conntrack is a fake untracked entry */
+	IPS_UNTRACKED_BIT = 12,
+	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index bde095f7e845..3bc38c70bbbe 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -261,7 +261,13 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Fake conntrack entry for untracked connections */
-extern struct nf_conn nf_conntrack_untracked;
+static inline struct nf_conn *nf_ct_untracked_get(void)
+{
+	extern struct nf_conn nf_conntrack_untracked;
+
+	return &nf_conntrack_untracked;
+}
+extern void nf_ct_untracked_status_or(unsigned long bits);
 
 /* Iterate over all conntracks: if iter returns true, it's deleted. */
 extern void
@@ -289,9 +295,9 @@ static inline int nf_ct_is_dying(struct nf_conn *ct)
 	return test_bit(IPS_DYING_BIT, &ct->status);
 }
 
-static inline int nf_ct_is_untracked(const struct sk_buff *skb)
+static inline int nf_ct_is_untracked(const struct nf_conn *ct)
 {
-	return (skb->nfct == &nf_conntrack_untracked.ct_general);
+	return test_bit(IPS_UNTRACKED_BIT, &ct->status);
 }
 
 extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3d7524fba194..aced085132e7 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -60,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
 	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
 	int ret = NF_ACCEPT;
 
-	if (ct && ct != &nf_conntrack_untracked) {
+	if (ct && !nf_ct_is_untracked(ct)) {
 		if (!nf_ct_is_confirmed(ct))
 			ret = __nf_conntrack_confirm(skb);
 		if (likely(ret == NF_ACCEPT))
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 4f8bddb760c9..c7719b283ada 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -742,7 +742,7 @@ static int __init nf_nat_init(void)
 	spin_unlock_bh(&nf_nat_lock);
 
 	/* Initialize fake conntrack so that NAT will skip it */
-	nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
+	nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
 
 	l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
 
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index beb25819c9c9..6723c682250d 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
 		return NF_ACCEPT;
 
 	/* Don't try to NAT if this packet is not conntracked */
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return NF_ACCEPT;
 
 	nat = nfct_nat(ct);
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 9be81776415e..1df3c8b6bf47 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
 	type = icmp6h->icmp6_type - 130;
 	if (type >= 0 && type < sizeof(noct_valid_new) &&
 	    noct_valid_new[type]) {
-		skb->nfct = &nf_conntrack_untracked.ct_general;
+		skb->nfct = &nf_ct_untracked_get()->ct_general;
 		skb->nfctinfo = IP_CT_NEW;
 		nf_conntrack_get(skb->nfct);
 		return NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index eeeb8bc73982..6c1da212380d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked __read_mostly;
+struct nf_conn nf_conntrack_untracked;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
@@ -1321,6 +1321,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 
+void nf_ct_untracked_status_or(unsigned long bits)
+{
+	nf_conntrack_untracked.status |= bits;
+}
+EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
+
 static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
@@ -1368,8 +1374,7 @@ static int nf_conntrack_init_init_net(void)
 #endif
 	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
 	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-
+	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
 
 #ifdef CONFIG_NF_CONNTRACK_ZONES
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c42ff6aa441d..5bae1cd15eea 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
 	int err;
 
 	/* ignore our fake conntrack entry */
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return 0;
 
 	if (events & (1 << IPCT_DESTROY)) {
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 562bf3266e04..0cb6053f02fd 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
-		ct = &nf_conntrack_untracked;
+		ct = nf_ct_untracked_get();
 		atomic_inc(&ct->ct_general.use);
 		goto out;
 	}
@@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
 	struct nf_conn *ct = info->ct;
 	struct nf_conn_help *help;
 
-	if (ct != &nf_conntrack_untracked) {
+	if (!nf_ct_is_untracked(ct)) {
 		help = nfct_help(ct);
 		if (help)
 			module_put(help->helper->me);
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index 512b9123252f..9d782181b6c8 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	   If there is a real ct entry correspondig to this packet,
 	   it'll hang aroun till timing out. We don't deal with it
 	   for performance reasons. JK */
-	skb->nfct = &nf_conntrack_untracked.ct_general;
+	skb->nfct = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 859d9fd429c8..7a118267c4c4 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 #ifdef WITH_CONNTRACK
 	/* Avoid counting cloned packets towards the original connection. */
 	nf_conntrack_put(skb->nfct);
-	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfct     = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 #endif
@@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 
 #ifdef WITH_CONNTRACK
 	nf_conntrack_put(skb->nfct);
-	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfct     = &nf_ct_untracked_get()->ct_general;
 	skb->nfctinfo = IP_CT_NEW;
 	nf_conntrack_get(skb->nfct);
 #endif
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 30b95a1c1c89..f4af1bfafb1c 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (ct == NULL)
 		return false;
 
-	if (ct == &nf_conntrack_untracked)
+	if (nf_ct_is_untracked(ct))
 		return false;
 
 	if (ct->master)
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 39681f10291c..e536710ad916 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
 
 	ct = nf_ct_get(skb, &ctinfo);
 
-	if (ct == &nf_conntrack_untracked)
-		statebit = XT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct != NULL)
-		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
-	else
+	if (ct) {
+		if (nf_ct_is_untracked(ct))
+			statebit = XT_CONNTRACK_STATE_UNTRACKED;
+		else
+			statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
+	} else
 		statebit = XT_CONNTRACK_STATE_INVALID;
 
 	if (info->match_flags & XT_CONNTRACK_STATE) {
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 3d54c236a1ba..1ca89908cbad 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	 * reply packet of an established SNAT-ted connection. */
 
 	ct = nf_ct_get(skb, &ctinfo);
-	if (ct && (ct != &nf_conntrack_untracked) &&
+	if (ct && !nf_ct_is_untracked(ct) &&
 	    ((iph->protocol != IPPROTO_ICMP &&
 	      ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
 	     (iph->protocol == IPPROTO_ICMP &&
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index e12e053d3782..a507922d80cd 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	unsigned int statebit;
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 
-	if (nf_ct_is_untracked(skb))
-		statebit = XT_STATE_UNTRACKED;
-	else if (!nf_ct_get(skb, &ctinfo))
+	if (!ct)
 		statebit = XT_STATE_INVALID;
-	else
-		statebit = XT_STATE_BIT(ctinfo);
-
+	else {
+		if (nf_ct_is_untracked(ct))
+			statebit = XT_STATE_UNTRACKED;
+		else
+			statebit = XT_STATE_BIT(ctinfo);
+	}
 	return (sinfo->statemask & statebit);
 }
 
-- 
cgit v1.2.3-58-ga151


From b3c5163fe0193a74016dba1bb22491e0d1e9aaa4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 14:43:38 +0200
Subject: netfilter: nf_conntrack: per_cpu untracking

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet, slowing down performance.

This patch converts it to a per_cpu variable.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h |  5 ++---
 net/netfilter/nf_conntrack_core.c    | 36 ++++++++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 3bc38c70bbbe..84a4b6fec99d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -261,11 +261,10 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
 			       u32 seq);
 
 /* Fake conntrack entry for untracked connections */
+DECLARE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 static inline struct nf_conn *nf_ct_untracked_get(void)
 {
-	extern struct nf_conn nf_conntrack_untracked;
-
-	return &nf_conntrack_untracked;
+	return &__raw_get_cpu_var(nf_conntrack_untracked);
 }
 extern void nf_ct_untracked_status_or(unsigned long bits);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6c1da212380d..9c661413b826 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -62,8 +62,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 unsigned int nf_conntrack_max __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_max);
 
-struct nf_conn nf_conntrack_untracked;
-EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
+DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
+EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
@@ -1183,10 +1183,21 @@ static void nf_ct_release_dying_list(struct net *net)
 	spin_unlock_bh(&nf_conntrack_lock);
 }
 
+static int untrack_refs(void)
+{
+	int cnt = 0, cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+		cnt += atomic_read(&ct->ct_general.use) - 1;
+	}
+	return cnt;
+}
+
 static void nf_conntrack_cleanup_init_net(void)
 {
-	/* wait until all references to nf_conntrack_untracked are dropped */
-	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+	while (untrack_refs() > 0)
 		schedule();
 
 	nf_conntrack_helper_fini();
@@ -1323,14 +1334,17 @@ module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 
 void nf_ct_untracked_status_or(unsigned long bits)
 {
-	nf_conntrack_untracked.status |= bits;
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(nf_conntrack_untracked, cpu).status |= bits;
 }
 EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
 
 static int nf_conntrack_init_init_net(void)
 {
 	int max_factor = 8;
-	int ret;
+	int ret, cpu;
 
 	/* Idea from tcp.c: use 1/16384 of memory.  On i386: 32MB
 	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
@@ -1369,10 +1383,12 @@ static int nf_conntrack_init_init_net(void)
 		goto err_extend;
 #endif
 	/* Set up fake conntrack: to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
-	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+	for_each_possible_cpu(cpu) {
+		struct nf_conn *ct = &per_cpu(nf_conntrack_untracked, cpu);
+
+		write_pnet(&ct->ct_net, &init_net);
+		atomic_set(&ct->ct_general.use, 1);
+	}
 	/*  - and look it like as a confirmed connection */
 	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
 	return 0;
-- 
cgit v1.2.3-58-ga151


From 2bf074825403e0e0d623bac7573580773b78abef Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Wed, 9 Jun 2010 14:47:40 +0200
Subject: netfilter: xt_sctp: use WORD_ROUND macro to calculate length of
 multiple of 4 bytes

Use  WORD_ROUND to round an int up to the next multiple of 4.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_sctp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index c04fcf385c59..ef36a56a02c6 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -3,6 +3,7 @@
 #include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/sctp/sctp.h>
 #include <linux/sctp.h>
 
 #include <linux/netfilter/x_tables.h>
@@ -67,7 +68,7 @@ match_packet(const struct sk_buff *skb,
 			 ++i, offset, sch->type, htons(sch->length),
 			 sch->flags);
 #endif
-		offset += (ntohs(sch->length) + 3) & ~3;
+		offset += WORD_ROUND(ntohs(sch->length));
 
 		pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
 
-- 
cgit v1.2.3-58-ga151


From 5756d346c7cdefcd84a8ac4901167cdfb5447b69 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 15:47:41 +0200
Subject: netfilter: ip_queue: rwlock to spinlock conversion

Converts queue_lock rwlock to a spinlock.

(readlocked part can be changed by reads of integer values)

One atomic operation instead of four per ipq_enqueue_packet() call.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_queue.c | 57 +++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index a4e5fc5df4bf..d2c1311cb28d 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -42,7 +42,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
+static DEFINE_SPINLOCK(queue_lock);
 static int peer_pid __read_mostly;
 static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
@@ -72,10 +72,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
 		break;
 
 	case IPQ_COPY_PACKET:
-		copy_mode = mode;
+		if (range > 0xFFFF)
+			range = 0xFFFF;
 		copy_range = range;
-		if (copy_range > 0xFFFF)
-			copy_range = 0xFFFF;
+		copy_mode = mode;
 		break;
 
 	default:
@@ -101,7 +101,7 @@ ipq_find_dequeue_entry(unsigned long id)
 {
 	struct nf_queue_entry *entry = NULL, *i;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	list_for_each_entry(i, &queue_list, list) {
 		if ((unsigned long)i == id) {
@@ -115,7 +115,7 @@ ipq_find_dequeue_entry(unsigned long id)
 		queue_total--;
 	}
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return entry;
 }
 
@@ -136,9 +136,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 static void
 ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 	__ipq_flush(cmpfn, data);
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 }
 
 static struct sk_buff *
@@ -152,9 +152,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	struct nlmsghdr *nlh;
 	struct timeval tv;
 
-	read_lock_bh(&queue_lock);
-
-	switch (copy_mode) {
+	switch (ACCESS_ONCE(copy_mode)) {
 	case IPQ_COPY_META:
 	case IPQ_COPY_NONE:
 		size = NLMSG_SPACE(sizeof(*pmsg));
@@ -162,26 +160,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 
 	case IPQ_COPY_PACKET:
 		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-		    (*errp = skb_checksum_help(entry->skb))) {
-			read_unlock_bh(&queue_lock);
+		    (*errp = skb_checksum_help(entry->skb)))
 			return NULL;
-		}
-		if (copy_range == 0 || copy_range > entry->skb->len)
+
+		data_len = ACCESS_ONCE(copy_range);
+		if (data_len == 0 || data_len > entry->skb->len)
 			data_len = entry->skb->len;
-		else
-			data_len = copy_range;
 
 		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
 		break;
 
 	default:
 		*errp = -EINVAL;
-		read_unlock_bh(&queue_lock);
 		return NULL;
 	}
 
-	read_unlock_bh(&queue_lock);
-
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb)
 		goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	if (nskb == NULL)
 		return status;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	if (!peer_pid)
 		goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	__ipq_enqueue_entry(entry);
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 
 err_out_free_nskb:
 	kfree_skb(nskb);
 
 err_out_unlock:
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 }
 
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
 {
 	int status;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 	status = __ipq_set_mode(mode, range);
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 }
 
@@ -440,11 +433,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
 	if (security_netlink_recv(skb, CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	if (peer_pid) {
 		if (peer_pid != pid) {
-			write_unlock_bh(&queue_lock);
+			spin_unlock_bh(&queue_lock);
 			RCV_SKB_FAIL(-EBUSY);
 		}
 	} else {
@@ -452,7 +445,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
 		peer_pid = pid;
 	}
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 
 	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
 				  nlmsglen - NLMSG_LENGTH(0));
@@ -497,10 +490,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
 	struct netlink_notify *n = ptr;
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {
-		write_lock_bh(&queue_lock);
+		spin_lock_bh(&queue_lock);
 		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
 			__ipq_reset();
-		write_unlock_bh(&queue_lock);
+		spin_unlock_bh(&queue_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -527,7 +520,7 @@ static ctl_table ipq_table[] = {
 #ifdef CONFIG_PROC_FS
 static int ip_queue_show(struct seq_file *m, void *v)
 {
-	read_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	seq_printf(m,
 		      "Peer PID          : %d\n"
@@ -545,7 +538,7 @@ static int ip_queue_show(struct seq_file *m, void *v)
 		      queue_dropped,
 		      queue_user_dropped);
 
-	read_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From aea9d711f3d68c656ad31ab578ecfb0bb5cd7f97 Mon Sep 17 00:00:00 2001
From: Sven Wegener <sven.wegener@stealer.net>
Date: Wed, 9 Jun 2010 16:10:57 +0200
Subject: ipvs: Add missing locking during connection table hashing and
 unhashing

The code that hashes and unhashes connections from the connection table
is missing locking of the connection being modified, which opens up a
race condition and results in memory corruption when this race condition
is hit.

Here is what happens in pretty verbose form:

CPU 0					CPU 1
------------				------------
An active connection is terminated and
we schedule ip_vs_conn_expire() on this
CPU to expire this connection.

					IRQ assignment is changed to this CPU,
					but the expire timer stays scheduled on
					the other CPU.

					New connection from same ip:port comes
					in right before the timer expires, we
					find the inactive connection in our
					connection table and get a reference to
					it. We proper lock the connection in
					tcp_state_transition() and read the
					connection flags in set_tcp_state().

ip_vs_conn_expire() gets called, we
unhash the connection from our
connection table and remove the hashed
flag in ip_vs_conn_unhash(), without
proper locking!

					While still holding proper locks we
					write the connection flags in
					set_tcp_state() and this sets the hashed
					flag again.

ip_vs_conn_expire() fails to expire the
connection, because the other CPU has
incremented the reference count. We try
to re-insert the connection into our
connection table, but this fails in
ip_vs_conn_hash(), because the hashed
flag has been set by the other CPU. We
re-schedule execution of
ip_vs_conn_expire(). Now this connection
has the hashed flag set, but isn't
actually hashed in our connection table
and has a dangling list_head.

					We drop the reference we held on the
					connection and schedule the expire timer
					for timeouting the connection on this
					CPU. Further packets won't be able to
					find this connection in our connection
					table.

					ip_vs_conn_expire() gets called again,
					we think it's already hashed, but the
					list_head is dangling and while removing
					the connection from our connection table
					we write to the memory location where
					this list_head points to.

The result will probably be a kernel oops at some other point in time.

This race condition is pretty subtle, but it can be triggered remotely.
It needs the IRQ assignment change or another circumstance where packets
coming from the same ip:port for the same service are being processed on
different CPUs. And it involves hitting the exact time at which
ip_vs_conn_expire() gets called. It can be avoided by making sure that
all packets from one connection are always processed on the same CPU and
can be made harder to exploit by changing the connection timeouts to
some custom values.

Signed-off-by: Sven Wegener <sven.wegener@stealer.net>
Cc: stable@kernel.org
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_conn.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8ef67b4..ff04e9edbed6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -162,6 +162,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
 	ct_write_lock(hash);
+	spin_lock(&cp->lock);
 
 	if (!(cp->flags & IP_VS_CONN_F_HASHED)) {
 		list_add(&cp->c_list, &ip_vs_conn_tab[hash]);
@@ -174,6 +175,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 		ret = 0;
 	}
 
+	spin_unlock(&cp->lock);
 	ct_write_unlock(hash);
 
 	return ret;
@@ -193,6 +195,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
 	ct_write_lock(hash);
+	spin_lock(&cp->lock);
 
 	if (cp->flags & IP_VS_CONN_F_HASHED) {
 		list_del(&cp->c_list);
@@ -202,6 +205,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
 	} else
 		ret = 0;
 
+	spin_unlock(&cp->lock);
 	ct_write_unlock(hash);
 
 	return ret;
-- 
cgit v1.2.3-58-ga151


From 144ad2a6c56b6109ff0f64074863ae5cf1c1698a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 16:25:08 +0200
Subject: netfilter: ip6_queue: rwlock to spinlock conversion

Converts queue_lock rwlock to a spinlock.

(readlocked part can be changed by reads of integer values)

One atomic operation instead of four per ipq_enqueue_packet() call.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6_queue.c | 57 ++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 8c201743d96d..413ab0754e1f 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -43,7 +43,7 @@ typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
 
 static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
 static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
+static DEFINE_SPINLOCK(queue_lock);
 static int peer_pid __read_mostly;
 static unsigned int copy_range __read_mostly;
 static unsigned int queue_total;
@@ -73,10 +73,10 @@ __ipq_set_mode(unsigned char mode, unsigned int range)
 		break;
 
 	case IPQ_COPY_PACKET:
-		copy_mode = mode;
+		if (range > 0xFFFF)
+			range = 0xFFFF;
 		copy_range = range;
-		if (copy_range > 0xFFFF)
-			copy_range = 0xFFFF;
+		copy_mode = mode;
 		break;
 
 	default:
@@ -102,7 +102,7 @@ ipq_find_dequeue_entry(unsigned long id)
 {
 	struct nf_queue_entry *entry = NULL, *i;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	list_for_each_entry(i, &queue_list, list) {
 		if ((unsigned long)i == id) {
@@ -116,7 +116,7 @@ ipq_find_dequeue_entry(unsigned long id)
 		queue_total--;
 	}
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return entry;
 }
 
@@ -137,9 +137,9 @@ __ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 static void
 ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
 {
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 	__ipq_flush(cmpfn, data);
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 }
 
 static struct sk_buff *
@@ -153,9 +153,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 	struct nlmsghdr *nlh;
 	struct timeval tv;
 
-	read_lock_bh(&queue_lock);
-
-	switch (copy_mode) {
+	switch (ACCESS_ONCE(copy_mode)) {
 	case IPQ_COPY_META:
 	case IPQ_COPY_NONE:
 		size = NLMSG_SPACE(sizeof(*pmsg));
@@ -163,26 +161,21 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 
 	case IPQ_COPY_PACKET:
 		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
-		    (*errp = skb_checksum_help(entry->skb))) {
-			read_unlock_bh(&queue_lock);
+		    (*errp = skb_checksum_help(entry->skb)))
 			return NULL;
-		}
-		if (copy_range == 0 || copy_range > entry->skb->len)
+
+		data_len = ACCESS_ONCE(copy_range);
+		if (data_len == 0 || data_len > entry->skb->len)
 			data_len = entry->skb->len;
-		else
-			data_len = copy_range;
 
 		size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
 		break;
 
 	default:
 		*errp = -EINVAL;
-		read_unlock_bh(&queue_lock);
 		return NULL;
 	}
 
-	read_unlock_bh(&queue_lock);
-
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb)
 		goto nlmsg_failure;
@@ -242,7 +235,7 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	if (nskb == NULL)
 		return status;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	if (!peer_pid)
 		goto err_out_free_nskb;
@@ -266,14 +259,14 @@ ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 
 	__ipq_enqueue_entry(entry);
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 
 err_out_free_nskb:
 	kfree_skb(nskb);
 
 err_out_unlock:
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 }
 
@@ -342,9 +335,9 @@ ipq_set_mode(unsigned char mode, unsigned int range)
 {
 	int status;
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 	status = __ipq_set_mode(mode, range);
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return status;
 }
 
@@ -441,11 +434,11 @@ __ipq_rcv_skb(struct sk_buff *skb)
 	if (security_netlink_recv(skb, CAP_NET_ADMIN))
 		RCV_SKB_FAIL(-EPERM);
 
-	write_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	if (peer_pid) {
 		if (peer_pid != pid) {
-			write_unlock_bh(&queue_lock);
+			spin_unlock_bh(&queue_lock);
 			RCV_SKB_FAIL(-EBUSY);
 		}
 	} else {
@@ -453,7 +446,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
 		peer_pid = pid;
 	}
 
-	write_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 
 	status = ipq_receive_peer(NLMSG_DATA(nlh), type,
 				  nlmsglen - NLMSG_LENGTH(0));
@@ -498,10 +491,10 @@ ipq_rcv_nl_event(struct notifier_block *this,
 	struct netlink_notify *n = ptr;
 
 	if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {
-		write_lock_bh(&queue_lock);
+		spin_lock_bh(&queue_lock);
 		if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))
 			__ipq_reset();
-		write_unlock_bh(&queue_lock);
+		spin_unlock_bh(&queue_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -528,7 +521,7 @@ static ctl_table ipq_table[] = {
 #ifdef CONFIG_PROC_FS
 static int ip6_queue_show(struct seq_file *m, void *v)
 {
-	read_lock_bh(&queue_lock);
+	spin_lock_bh(&queue_lock);
 
 	seq_printf(m,
 		      "Peer PID          : %d\n"
@@ -546,7 +539,7 @@ static int ip6_queue_show(struct seq_file *m, void *v)
 		      queue_dropped,
 		      queue_user_dropped);
 
-	read_unlock_bh(&queue_lock);
+	spin_unlock_bh(&queue_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From c463ac972315a0c86bb20b8d35225baa75caf899 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 18:07:06 +0200
Subject: netfilter: nfnetlink_queue: some optimizations

- Use an atomic_t for id_sequence to avoid a spin_lock/spin_unlock pair

- Group highly modified struct nfqnl_instance fields together

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 33 ++++++++++++++-------------------
 1 file changed, 14 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 12e1ab37fcd8..d05605b38f6f 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -46,17 +46,19 @@ struct nfqnl_instance {
 	int peer_pid;
 	unsigned int queue_maxlen;
 	unsigned int copy_range;
-	unsigned int queue_total;
 	unsigned int queue_dropped;
 	unsigned int queue_user_dropped;
 
-	unsigned int id_sequence;		/* 'sequence' of pkt ids */
 
 	u_int16_t queue_num;			/* number of this queue */
 	u_int8_t copy_mode;
-
-	spinlock_t lock;
-
+/*
+ * Following fields are dirtied for each queued packet,
+ * keep them in same cache line if possible.
+ */
+	spinlock_t	lock;
+	unsigned int	queue_total;
+	atomic_t	id_sequence;		/* 'sequence' of pkt ids */
 	struct list_head queue_list;		/* packets in queue */
 };
 
@@ -238,32 +240,24 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
 	outdev = entry->outdev;
 
-	spin_lock_bh(&queue->lock);
-
-	switch ((enum nfqnl_config_mode)queue->copy_mode) {
+	switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) {
 	case NFQNL_COPY_META:
 	case NFQNL_COPY_NONE:
 		break;
 
 	case NFQNL_COPY_PACKET:
 		if (entskb->ip_summed == CHECKSUM_PARTIAL &&
-		    skb_checksum_help(entskb)) {
-			spin_unlock_bh(&queue->lock);
+		    skb_checksum_help(entskb))
 			return NULL;
-		}
-		if (queue->copy_range == 0
-		    || queue->copy_range > entskb->len)
+
+		data_len = ACCESS_ONCE(queue->copy_range);
+		if (data_len == 0 || data_len > entskb->len)
 			data_len = entskb->len;
-		else
-			data_len = queue->copy_range;
 
 		size += nla_total_size(data_len);
 		break;
 	}
 
-	entry->id = queue->id_sequence++;
-
-	spin_unlock_bh(&queue->lock);
 
 	skb = alloc_skb(size, GFP_ATOMIC);
 	if (!skb)
@@ -278,6 +272,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(queue->queue_num);
 
+	entry->id = atomic_inc_return(&queue->id_sequence);
 	pmsg.packet_id 		= htonl(entry->id);
 	pmsg.hw_protocol	= entskb->protocol;
 	pmsg.hook		= entry->hook;
@@ -866,7 +861,7 @@ static int seq_show(struct seq_file *s, void *v)
 			  inst->peer_pid, inst->queue_total,
 			  inst->copy_mode, inst->copy_range,
 			  inst->queue_dropped, inst->queue_user_dropped,
-			  inst->id_sequence, 1);
+			  atomic_read(&inst->id_sequence), 1);
 }
 
 static const struct seq_operations nfqnl_seq_ops = {
-- 
cgit v1.2.3-58-ga151


From bed1be20867d17a3eb2fb5e1613ebdc50c83b8aa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 18:14:58 +0200
Subject: netfilter: nfnetlink_log: RCU conversion

- instances_lock becomes a spinlock
- lockless lookups

While nfnetlink_log probably not performance critical, using less
rwlocks in our code is always welcomed...

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_log.c | 49 ++++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fc9a211e629e..8ec23ec568e7 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -66,9 +66,10 @@ struct nfulnl_instance {
 	u_int16_t group_num;		/* number of this queue */
 	u_int16_t flags;
 	u_int8_t copy_mode;
+	struct rcu_head rcu;
 };
 
-static DEFINE_RWLOCK(instances_lock);
+static DEFINE_SPINLOCK(instances_lock);
 static atomic_t global_seq;
 
 #define INSTANCE_BUCKETS	16
@@ -88,7 +89,7 @@ __instance_lookup(u_int16_t group_num)
 	struct nfulnl_instance *inst;
 
 	head = &instance_table[instance_hashfn(group_num)];
-	hlist_for_each_entry(inst, pos, head, hlist) {
+	hlist_for_each_entry_rcu(inst, pos, head, hlist) {
 		if (inst->group_num == group_num)
 			return inst;
 	}
@@ -106,22 +107,26 @@ instance_lookup_get(u_int16_t group_num)
 {
 	struct nfulnl_instance *inst;
 
-	read_lock_bh(&instances_lock);
+	rcu_read_lock_bh();
 	inst = __instance_lookup(group_num);
 	if (inst)
 		instance_get(inst);
-	read_unlock_bh(&instances_lock);
+	rcu_read_unlock_bh();
 
 	return inst;
 }
 
+static void nfulnl_instance_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct nfulnl_instance, rcu));
+	module_put(THIS_MODULE);
+}
+
 static void
 instance_put(struct nfulnl_instance *inst)
 {
-	if (inst && atomic_dec_and_test(&inst->use)) {
-		kfree(inst);
-		module_put(THIS_MODULE);
-	}
+	if (inst && atomic_dec_and_test(&inst->use))
+		call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
 }
 
 static void nfulnl_timer(unsigned long data);
@@ -132,7 +137,7 @@ instance_create(u_int16_t group_num, int pid)
 	struct nfulnl_instance *inst;
 	int err;
 
-	write_lock_bh(&instances_lock);
+	spin_lock_bh(&instances_lock);
 	if (__instance_lookup(group_num)) {
 		err = -EEXIST;
 		goto out_unlock;
@@ -169,12 +174,12 @@ instance_create(u_int16_t group_num, int pid)
 	hlist_add_head(&inst->hlist,
 		       &instance_table[instance_hashfn(group_num)]);
 
-	write_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances_lock);
 
 	return inst;
 
 out_unlock:
-	write_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances_lock);
 	return ERR_PTR(err);
 }
 
@@ -200,9 +205,9 @@ __instance_destroy(struct nfulnl_instance *inst)
 static inline void
 instance_destroy(struct nfulnl_instance *inst)
 {
-	write_lock_bh(&instances_lock);
+	spin_lock_bh(&instances_lock);
 	__instance_destroy(inst);
-	write_unlock_bh(&instances_lock);
+	spin_unlock_bh(&instances_lock);
 }
 
 static int
@@ -672,7 +677,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 		int i;
 
 		/* destroy all instances for this pid */
-		write_lock_bh(&instances_lock);
+		spin_lock_bh(&instances_lock);
 		for  (i = 0; i < INSTANCE_BUCKETS; i++) {
 			struct hlist_node *tmp, *t2;
 			struct nfulnl_instance *inst;
@@ -684,7 +689,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this,
 					__instance_destroy(inst);
 			}
 		}
-		write_unlock_bh(&instances_lock);
+		spin_unlock_bh(&instances_lock);
 	}
 	return NOTIFY_DONE;
 }
@@ -861,19 +866,19 @@ static struct hlist_node *get_first(struct iter_state *st)
 
 	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
 		if (!hlist_empty(&instance_table[st->bucket]))
-			return instance_table[st->bucket].first;
+			return rcu_dereference_bh(instance_table[st->bucket].first);
 	}
 	return NULL;
 }
 
 static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h)
 {
-	h = h->next;
+	h = rcu_dereference_bh(h->next);
 	while (!h) {
 		if (++st->bucket >= INSTANCE_BUCKETS)
 			return NULL;
 
-		h = instance_table[st->bucket].first;
+		h = rcu_dereference_bh(instance_table[st->bucket].first);
 	}
 	return h;
 }
@@ -890,9 +895,9 @@ static struct hlist_node *get_idx(struct iter_state *st, loff_t pos)
 }
 
 static void *seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(instances_lock)
+	__acquires(rcu_bh)
 {
-	read_lock_bh(&instances_lock);
+	rcu_read_lock_bh();
 	return get_idx(seq->private, *pos);
 }
 
@@ -903,9 +908,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
 }
 
 static void seq_stop(struct seq_file *s, void *v)
-	__releases(instances_lock)
+	__releases(rcu_bh)
 {
-	read_unlock_bh(&instances_lock);
+	rcu_read_unlock_bh();
 }
 
 static int seq_show(struct seq_file *s, void *v)
-- 
cgit v1.2.3-58-ga151


From 08c801f8d45387a1b46066aad1789a9bb9c4b645 Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 8 Jun 2010 17:51:27 -0600
Subject: net: Print num_rx_queues imbalance warning only when there are
 allocated queues

BugLink: http://bugs.launchpad.net/bugs/591416

There are a number of network drivers (bridge, bonding, etc) that are not yet
receive multi-queue enabled and use alloc_netdev(), so don't print a
num_rx_queues imbalance warning in that case.

Also, only print the warning once for those drivers that _are_ multi-queue
enabled.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/core/dev.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d03470f5260a..14a85682af38 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2253,11 +2253,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->num_rx_queues)) {
-			if (net_ratelimit()) {
-				pr_warning("%s received packet on queue "
-					"%u, but number of RX queues is %u\n",
-					dev->name, index, dev->num_rx_queues);
-			}
+			WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
+				"on queue %u, but number of RX queues is %u\n",
+				dev->name, index, dev->num_rx_queues);
 			goto done;
 		}
 		rxqueue = dev->_rx + index;
-- 
cgit v1.2.3-58-ga151


From 88e7594a9775e54dcd421cb246406dce62e48bee Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 7 Jun 2010 03:27:39 +0000
Subject: phonet: use call_rcu for phonet device free

Use call_rcu rather than synchronize_rcu.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pn_dev.h |  1 +
 net/phonet/pn_dev.c         | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index d7b989ca3d63..2d16783d5e20 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -34,6 +34,7 @@ struct phonet_device {
 	struct list_head list;
 	struct net_device *netdev;
 	DECLARE_BITMAP(addrs, 64);
+	struct rcu_head	rcu;
 };
 
 int phonet_device_init(void);
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index c33da6576942..b18e48fae975 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -162,6 +162,14 @@ int phonet_address_add(struct net_device *dev, u8 addr)
 	return err;
 }
 
+static void phonet_device_rcu_free(struct rcu_head *head)
+{
+	struct phonet_device *pnd;
+
+	pnd = container_of(head, struct phonet_device, rcu);
+	kfree(pnd);
+}
+
 int phonet_address_del(struct net_device *dev, u8 addr)
 {
 	struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
@@ -179,10 +187,9 @@ int phonet_address_del(struct net_device *dev, u8 addr)
 		pnd = NULL;
 	mutex_unlock(&pndevs->lock);
 
-	if (pnd) {
-		synchronize_rcu();
-		kfree(pnd);
-	}
+	if (pnd)
+		call_rcu(&pnd->rcu, phonet_device_rcu_free);
+
 	return err;
 }
 
-- 
cgit v1.2.3-58-ga151


From aea34e7ae7a40bc72f9f11b5658160dfb4b90c48 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 7 Jun 2010 04:51:58 +0000
Subject: caif: fix a couple range checks

The extra ! character means that these conditions are always false.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfrfml.c | 2 +-
 net/caif/cfveil.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index cd2830fec935..fd27b172fb5d 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -83,7 +83,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
-	if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
 		pr_err("CAIF: %s():Packet too large - size=%d\n",
 			__func__, cfpkt_getlen(pkt));
 		return -EOVERFLOW;
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 0fd827f49491..e04f7d964e83 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -84,7 +84,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
 		return ret;
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
-	if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
 		pr_warning("CAIF: %s(): Packet too large - size=%d\n",
 			   __func__, cfpkt_getlen(pkt));
 		return -EOVERFLOW;
-- 
cgit v1.2.3-58-ga151


From cfa087f689402438e3cb0f077e649d01c871b0e7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 22:34:35 +0000
Subject: icmp: RCU conversion in icmp_address_reply()

- rcu_read_lock() already held by caller
- use __in_dev_get_rcu() instead of in_dev_get() / in_dev_put()
- remove goto out;

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/icmp.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index d65e9215bcd7..bdb6c71e72a6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -925,6 +925,7 @@ static void icmp_address(struct sk_buff *skb)
 /*
  * RFC1812 (4.3.3.9).	A router SHOULD listen all replies, and complain
  *			loudly if an inconsistency is found.
+ * called with rcu_read_lock()
  */
 
 static void icmp_address_reply(struct sk_buff *skb)
@@ -935,12 +936,12 @@ static void icmp_address_reply(struct sk_buff *skb)
 	struct in_ifaddr *ifa;
 
 	if (skb->len < 4 || !(rt->rt_flags&RTCF_DIRECTSRC))
-		goto out;
+		return;
 
-	in_dev = in_dev_get(dev);
+	in_dev = __in_dev_get_rcu(dev);
 	if (!in_dev)
-		goto out;
-	rcu_read_lock();
+		return;
+
 	if (in_dev->ifa_list &&
 	    IN_DEV_LOG_MARTIANS(in_dev) &&
 	    IN_DEV_FORWARD(in_dev)) {
@@ -958,9 +959,6 @@ static void icmp_address_reply(struct sk_buff *skb)
 			       mp, dev->name, &rt->rt_src);
 		}
 	}
-	rcu_read_unlock();
-	in_dev_put(in_dev);
-out:;
 }
 
 static void icmp_discard(struct sk_buff *skb)
-- 
cgit v1.2.3-58-ga151


From 96b52e61be1ad4d4f8de39b9deaf253da804ea3b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 21:05:02 +0000
Subject: ipv6: mcast: RCU conversions

- ipv6_sock_mc_join() : doesnt touch dev refcount

- ipv6_sock_mc_drop() : doesnt touch dev/idev refcounts

- ip6_mc_find_dev() becomes ip6_mc_find_dev_rcu() (called from rcu),
                    and doesnt touch dev/idev refcounts

- ipv6_sock_mc_close() : doesnt touch dev/idev refcounts

- ip6_mc_source() uses ip6_mc_find_dev_rcu()

- ip6_mc_msfilter() uses ip6_mc_find_dev_rcu()

- ip6_mc_msfget() uses ip6_mc_find_dev_rcu()

- ipv6_dev_mc_dec(), ipv6_chk_mcast_addr(),
  igmp6_event_query(), igmp6_event_report(),
  mld_sendpack(), igmp6_send() dont touch idev refcount

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mcast.c | 183 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 87 insertions(+), 96 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 8752e8084806..3e36d1538b6e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -152,18 +152,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	mc_lst->next = NULL;
 	ipv6_addr_copy(&mc_lst->addr, addr);
 
+	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dev_hold(dev);
 			dst_release(&rt->u.dst);
 		}
 	} else
-		dev = dev_get_by_index(net, ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (dev == NULL) {
+		rcu_read_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 		return -ENODEV;
 	}
@@ -180,8 +181,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	err = ipv6_dev_mc_inc(dev, addr);
 
 	if (err) {
+		rcu_read_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
-		dev_put(dev);
 		return err;
 	}
 
@@ -190,7 +191,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	np->ipv6_mc_list = mc_lst;
 	write_unlock_bh(&ipv6_sk_mc_lock);
 
-	dev_put(dev);
+	rcu_read_unlock();
 
 	return 0;
 }
@@ -213,18 +214,17 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			*lnk = mc_lst->next;
 			write_unlock_bh(&ipv6_sk_mc_lock);
 
-			dev = dev_get_by_index(net, mc_lst->ifindex);
+			rcu_read_lock();
+			dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
 			if (dev != NULL) {
-				struct inet6_dev *idev = in6_dev_get(dev);
+				struct inet6_dev *idev = __in6_dev_get(dev);
 
 				(void) ip6_mc_leave_src(sk, mc_lst, idev);
-				if (idev) {
+				if (idev)
 					__ipv6_dev_mc_dec(idev, &mc_lst->addr);
-					in6_dev_put(idev);
-				}
-				dev_put(dev);
 			} else
 				(void) ip6_mc_leave_src(sk, mc_lst, NULL);
+			rcu_read_unlock();
 			sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 			return 0;
 		}
@@ -234,43 +234,36 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	return -EADDRNOTAVAIL;
 }
 
-static struct inet6_dev *ip6_mc_find_dev(struct net *net,
-					 struct in6_addr *group,
-					 int ifindex)
+/* called with rcu_read_lock() */
+static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
+					     struct in6_addr *group,
+					     int ifindex)
 {
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 
 	if (ifindex == 0) {
-		struct rt6_info *rt;
+		struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
 
-		rt = rt6_lookup(net, group, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
 			dev_hold(dev);
 			dst_release(&rt->u.dst);
 		}
 	} else
-		dev = dev_get_by_index(net, ifindex);
+		dev = dev_get_by_index_rcu(net, ifindex);
 
 	if (!dev)
-		goto nodev;
-	idev = in6_dev_get(dev);
+		return NULL;
+	idev = __in6_dev_get(dev);
 	if (!idev)
-		goto release;
+		return NULL;;
 	read_lock_bh(&idev->lock);
-	if (idev->dead)
-		goto unlock_release;
-
+	if (idev->dead) {
+		read_unlock_bh(&idev->lock);
+		return NULL;
+	}
 	return idev;
-
-unlock_release:
-	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-release:
-	dev_put(dev);
-nodev:
-	return NULL;
 }
 
 void ipv6_sock_mc_close(struct sock *sk)
@@ -286,19 +279,17 @@ void ipv6_sock_mc_close(struct sock *sk)
 		np->ipv6_mc_list = mc_lst->next;
 		write_unlock_bh(&ipv6_sk_mc_lock);
 
-		dev = dev_get_by_index(net, mc_lst->ifindex);
+		rcu_read_lock();
+		dev = dev_get_by_index_rcu(net, mc_lst->ifindex);
 		if (dev) {
-			struct inet6_dev *idev = in6_dev_get(dev);
+			struct inet6_dev *idev = __in6_dev_get(dev);
 
 			(void) ip6_mc_leave_src(sk, mc_lst, idev);
-			if (idev) {
+			if (idev)
 				__ipv6_dev_mc_dec(idev, &mc_lst->addr);
-				in6_dev_put(idev);
-			}
-			dev_put(dev);
 		} else
 			(void) ip6_mc_leave_src(sk, mc_lst, NULL);
-
+		rcu_read_unlock();
 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
 
 		write_lock_bh(&ipv6_sk_mc_lock);
@@ -327,14 +318,17 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface);
-	if (!idev)
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, pgsr->gsr_interface);
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
+	}
 	dev = idev->dev;
 
 	err = -EADDRNOTAVAIL;
 
-	read_lock_bh(&ipv6_sk_mc_lock);
+	read_lock(&ipv6_sk_mc_lock);
 	for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
 		if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
 			continue;
@@ -358,7 +352,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		pmc->sfmode = omode;
 	}
 
-	write_lock_bh(&pmc->sflock);
+	write_lock(&pmc->sflock);
 	pmclocked = 1;
 
 	psl = pmc->sflist;
@@ -433,11 +427,10 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 	ip6_mc_add_src(idev, group, omode, 1, source, 1);
 done:
 	if (pmclocked)
-		write_unlock_bh(&pmc->sflock);
-	read_unlock_bh(&ipv6_sk_mc_lock);
+		write_unlock(&pmc->sflock);
+	read_unlock(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	if (leavegroup)
 		return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
 	return err;
@@ -463,14 +456,17 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 	    gsf->gf_fmode != MCAST_EXCLUDE)
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(net, group, gsf->gf_interface);
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
 
-	if (!idev)
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
+	}
 	dev = idev->dev;
 
 	err = 0;
-	read_lock_bh(&ipv6_sk_mc_lock);
+	read_lock(&ipv6_sk_mc_lock);
 
 	if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
 		leavegroup = 1;
@@ -512,7 +508,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		(void) ip6_mc_add_src(idev, group, gsf->gf_fmode, 0, NULL, 0);
 	}
 
-	write_lock_bh(&pmc->sflock);
+	write_lock(&pmc->sflock);
 	psl = pmc->sflist;
 	if (psl) {
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode,
@@ -522,13 +518,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 		(void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
 	pmc->sflist = newpsl;
 	pmc->sfmode = gsf->gf_fmode;
-	write_unlock_bh(&pmc->sflock);
+	write_unlock(&pmc->sflock);
 	err = 0;
 done:
-	read_unlock_bh(&ipv6_sk_mc_lock);
+	read_unlock(&ipv6_sk_mc_lock);
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	if (leavegroup)
 		err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
 	return err;
@@ -551,11 +546,13 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	if (!ipv6_addr_is_multicast(group))
 		return -EINVAL;
 
-	idev = ip6_mc_find_dev(net, group, gsf->gf_interface);
+	rcu_read_lock();
+	idev = ip6_mc_find_dev_rcu(net, group, gsf->gf_interface);
 
-	if (!idev)
+	if (!idev) {
+		rcu_read_unlock();
 		return -ENODEV;
-
+	}
 	dev = idev->dev;
 
 	err = -EADDRNOTAVAIL;
@@ -577,8 +574,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	psl = pmc->sflist;
 	count = psl ? psl->sl_count : 0;
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
@@ -604,8 +600,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 	return 0;
 done:
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
-	dev_put(dev);
+	rcu_read_unlock();
 	return err;
 }
 
@@ -822,6 +817,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 	struct ifmcaddr6 *mc;
 	struct inet6_dev *idev;
 
+	/* we need to take a reference on idev */
 	idev = in6_dev_get(dev);
 
 	if (idev == NULL)
@@ -860,7 +856,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 	setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
 
 	ipv6_addr_copy(&mc->mca_addr, addr);
-	mc->idev = idev;
+	mc->idev = idev; /* (reference taken) */
 	mc->mca_users = 1;
 	/* mca_stamp should be updated upon changes */
 	mc->mca_cstamp = mc->mca_tstamp = jiffies;
@@ -915,16 +911,18 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 
 int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
 {
-	struct inet6_dev *idev = in6_dev_get(dev);
+	struct inet6_dev *idev;
 	int err;
 
-	if (!idev)
-		return -ENODEV;
-
-	err = __ipv6_dev_mc_dec(idev, addr);
+	rcu_read_lock();
 
-	in6_dev_put(idev);
+	idev = __in6_dev_get(dev);
+	if (!idev)
+		err = -ENODEV;
+	else
+		err = __ipv6_dev_mc_dec(idev, addr);
 
+	rcu_read_unlock();
 	return err;
 }
 
@@ -965,7 +963,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 	struct ifmcaddr6 *mc;
 	int rv = 0;
 
-	idev = in6_dev_get(dev);
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
 		for (mc = idev->mc_list; mc; mc=mc->next) {
@@ -992,8 +991,8 @@ int ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
 				rv = 1; /* don't filter unspecified source */
 		}
 		read_unlock_bh(&idev->lock);
-		in6_dev_put(idev);
 	}
+	rcu_read_unlock();
 	return rv;
 }
 
@@ -1104,6 +1103,7 @@ static int mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
 	return 1;
 }
 
+/* called with rcu_read_lock() */
 int igmp6_event_query(struct sk_buff *skb)
 {
 	struct mld2_query *mlh2 = NULL;
@@ -1127,7 +1127,7 @@ int igmp6_event_query(struct sk_buff *skb)
 	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
-	idev = in6_dev_get(skb->dev);
+	idev = __in6_dev_get(skb->dev);
 
 	if (idev == NULL)
 		return 0;
@@ -1137,10 +1137,8 @@ int igmp6_event_query(struct sk_buff *skb)
 	group_type = ipv6_addr_type(group);
 
 	if (group_type != IPV6_ADDR_ANY &&
-	    !(group_type&IPV6_ADDR_MULTICAST)) {
-		in6_dev_put(idev);
+	    !(group_type&IPV6_ADDR_MULTICAST))
 		return -EINVAL;
-	}
 
 	if (len == 24) {
 		int switchback;
@@ -1161,10 +1159,9 @@ int igmp6_event_query(struct sk_buff *skb)
 	} else if (len >= 28) {
 		int srcs_offset = sizeof(struct mld2_query) -
 				  sizeof(struct icmp6hdr);
-		if (!pskb_may_pull(skb, srcs_offset)) {
-			in6_dev_put(idev);
+		if (!pskb_may_pull(skb, srcs_offset))
 			return -EINVAL;
-		}
+
 		mlh2 = (struct mld2_query *)skb_transport_header(skb);
 		max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
 		if (!max_delay)
@@ -1173,28 +1170,23 @@ int igmp6_event_query(struct sk_buff *skb)
 		if (mlh2->mld2q_qrv)
 			idev->mc_qrv = mlh2->mld2q_qrv;
 		if (group_type == IPV6_ADDR_ANY) { /* general query */
-			if (mlh2->mld2q_nsrcs) {
-				in6_dev_put(idev);
+			if (mlh2->mld2q_nsrcs)
 				return -EINVAL; /* no sources allowed */
-			}
+
 			mld_gq_start_timer(idev);
-			in6_dev_put(idev);
 			return 0;
 		}
 		/* mark sources to include, if group & source-specific */
 		if (mlh2->mld2q_nsrcs != 0) {
 			if (!pskb_may_pull(skb, srcs_offset +
-			    ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) {
-				in6_dev_put(idev);
+			    ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr)))
 				return -EINVAL;
-			}
+
 			mlh2 = (struct mld2_query *)skb_transport_header(skb);
 			mark = 1;
 		}
-	} else {
-		in6_dev_put(idev);
+	} else
 		return -EINVAL;
-	}
 
 	read_lock_bh(&idev->lock);
 	if (group_type == IPV6_ADDR_ANY) {
@@ -1227,12 +1219,11 @@ int igmp6_event_query(struct sk_buff *skb)
 		}
 	}
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
 
 	return 0;
 }
 
-
+/* called with rcu_read_lock() */
 int igmp6_event_report(struct sk_buff *skb)
 {
 	struct ifmcaddr6 *ma;
@@ -1260,7 +1251,7 @@ int igmp6_event_report(struct sk_buff *skb)
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
-	idev = in6_dev_get(skb->dev);
+	idev = __in6_dev_get(skb->dev);
 	if (idev == NULL)
 		return -ENODEV;
 
@@ -1280,7 +1271,6 @@ int igmp6_event_report(struct sk_buff *skb)
 		}
 	}
 	read_unlock_bh(&idev->lock);
-	in6_dev_put(idev);
 	return 0;
 }
 
@@ -1396,12 +1386,14 @@ static void mld_sendpack(struct sk_buff *skb)
 	struct mld2_report *pmr =
 			      (struct mld2_report *)skb_transport_header(skb);
 	int payload_len, mldlen;
-	struct inet6_dev *idev = in6_dev_get(skb->dev);
+	struct inet6_dev *idev;
 	struct net *net = dev_net(skb->dev);
 	int err;
 	struct flowi fl;
 	struct dst_entry *dst;
 
+	rcu_read_lock();
+	idev = __in6_dev_get(skb->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
 	payload_len = (skb->tail - skb->network_header) - sizeof(*pip6);
@@ -1441,8 +1433,7 @@ out:
 	} else
 		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	rcu_read_unlock();
 	return;
 
 err_out:
@@ -1779,7 +1770,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 					 IPPROTO_ICMPV6,
 					 csum_partial(hdr, len, 0));
 
-	idev = in6_dev_get(skb->dev);
+	rcu_read_lock();
+	idev = __in6_dev_get(skb->dev);
 
 	dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
 	if (!dst) {
@@ -1806,8 +1798,7 @@ out:
 	} else
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 
-	if (likely(idev != NULL))
-		in6_dev_put(idev);
+	rcu_read_unlock();
 	return;
 
 err_out:
-- 
cgit v1.2.3-58-ga151


From 00d9d6a185de89edc0649ca4ead58f0283dfcbac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 7 Jun 2010 22:24:44 +0000
Subject: ipv6: fix ICMP6_MIB_OUTERRORS

In commit 1f8438a85366 (icmp: Account for ICMP out errors), I did a typo
on IPV6 side, using ICMP6_MIB_OUTMSGS instead of ICMP6_MIB_OUTERRORS

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/icmp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index ce7992982557..03e62f94ff8e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -483,7 +483,7 @@ route_done:
 			      np->tclass, NULL, &fl, (struct rt6_info*)dst,
 			      MSG_DONTWAIT, np->dontfrag);
 	if (err) {
-		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
@@ -565,7 +565,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 				np->dontfrag);
 
 	if (err) {
-		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
-- 
cgit v1.2.3-58-ga151


From 597a264b1a9c7e36d1728f677c66c5c1f7e3b837 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 3 Jun 2010 09:30:11 +0000
Subject: net: deliver skbs on inactive slaves to exact matches

Currently, the accelerated receive path for VLAN's will
drop packets if the real device is an inactive slave and
is not one of the special pkts tested for in
skb_bond_should_drop().  This behavior is different then
the non-accelerated path and for pkts over a bonded vlan.

For example,

vlanx -> bond0 -> ethx

will be dropped in the vlan path and not delivered to any
packet handlers at all.  However,

bond0 -> vlanx -> ethx

and

bond0 -> ethx

will be delivered to handlers that match the exact dev,
because the VLAN path checks the real_dev which is not a
slave and netif_recv_skb() doesn't drop frames but only
delivers them to exact matches.

This patch adds a sk_buff flag which is used for tagging
skbs that would previously been dropped and allows the
skb to continue to skb_netif_recv().  Here we add
logic to check for the deliver_no_wcard flag and if it
is set only deliver to handlers that match exactly.  This
makes both paths above consistent and gives pkt handlers
a way to identify skbs that come from inactive slaves.
Without this patch in some configurations skbs will be
delivered to handlers with exact matches and in others
be dropped out right in the vlan path.

I have tested the following 4 configurations in failover modes
and load balancing modes.

# bond0 -> ethx

# vlanx -> bond0 -> ethx

# bond0 -> vlanx -> ethx

# bond0 -> ethx
            |
  vlanx -> --

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  5 ++++-
 net/8021q/vlan_core.c  |  4 ++--
 net/core/dev.c         | 17 ++++++++++++++---
 3 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index bf243fc54959..f89e7fd59a4c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -380,7 +380,10 @@ struct sk_buff {
 	kmemcheck_bitfield_begin(flags2);
 	__u16			queue_mapping:16;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
-	__u8			ndisc_nodetype:2;
+	__u8			ndisc_nodetype:2,
+				deliver_no_wcard:1;
+#else
+	__u8			deliver_no_wcard:1;
 #endif
 	kmemcheck_bitfield_end(flags2);
 
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index bd537fc10254..50f58f5f1c34 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -12,7 +12,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		return NET_RX_DROP;
 
 	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
-		goto drop;
+		skb->deliver_no_wcard = 1;
 
 	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
@@ -84,7 +84,7 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 	struct sk_buff *p;
 
 	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
-		goto drop;
+		skb->deliver_no_wcard = 1;
 
 	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
diff --git a/net/core/dev.c b/net/core/dev.c
index 14a85682af38..2b3bf53bc687 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2810,13 +2810,24 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	if (!skb->skb_iif)
 		skb->skb_iif = skb->dev->ifindex;
 
+	/*
+	 * bonding note: skbs received on inactive slaves should only
+	 * be delivered to pkt handlers that are exact matches.  Also
+	 * the deliver_no_wcard flag will be set.  If packet handlers
+	 * are sensitive to duplicate packets these skbs will need to
+	 * be dropped at the handler.  The vlan accel path may have
+	 * already set the deliver_no_wcard flag.
+	 */
 	null_or_orig = NULL;
 	orig_dev = skb->dev;
 	master = ACCESS_ONCE(orig_dev->master);
-	if (master) {
-		if (skb_bond_should_drop(skb, master))
+	if (skb->deliver_no_wcard)
+		null_or_orig = orig_dev;
+	else if (master) {
+		if (skb_bond_should_drop(skb, master)) {
+			skb->deliver_no_wcard = 1;
 			null_or_orig = orig_dev; /* deliver only exact match */
-		else
+		} else
 			skb->dev = master;
 	}
 
-- 
cgit v1.2.3-58-ga151


From 592fcb9dfafaa02dd0edc207bf5d3a0ee7a1f8df Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 16:21:07 +0000
Subject: ip: ip_ra_control() rcu fix

commit 66018506e15b (ip: Router Alert RCU conversion) introduced RCU
lookups to ip_call_ra_chain(). It missed proper deinit phase :
When ip_ra_control() deletes an ip_ra_chain, it should make sure
ip_call_ra_chain() users can not start to use socket during the rcu
grace period. It should also delay the sock_put() after the grace
period, or we risk a premature socket freeing and corruptions, as
raw sockets are not rcu protected yet.

This delay avoids using expensive atomic_inc_not_zero() in
ip_call_ra_chain().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  5 ++++-
 net/ipv4/ip_sockglue.c | 19 +++++++++++++++----
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 9982c97f0bdc..d52f01180361 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -61,7 +61,10 @@ struct ipcm_cookie {
 struct ip_ra_chain {
 	struct ip_ra_chain	*next;
 	struct sock		*sk;
-	void			(*destructor)(struct sock *);
+	union {
+		void			(*destructor)(struct sock *);
+		struct sock		*saved_sk;
+	};
 	struct rcu_head		rcu;
 };
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 08b9519a24f4..47fff528ff39 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -241,9 +241,13 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
 struct ip_ra_chain *ip_ra_chain;
 static DEFINE_SPINLOCK(ip_ra_lock);
 
-static void ip_ra_free_rcu(struct rcu_head *head)
+
+static void ip_ra_destroy_rcu(struct rcu_head *head)
 {
-	kfree(container_of(head, struct ip_ra_chain, rcu));
+	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
+
+	sock_put(ra->saved_sk);
+	kfree(ra);
 }
 
 int ip_ra_control(struct sock *sk, unsigned char on,
@@ -264,13 +268,20 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 				kfree(new_ra);
 				return -EADDRINUSE;
 			}
+			/* dont let ip_call_ra_chain() use sk again */
+			ra->sk = NULL;
 			rcu_assign_pointer(*rap, ra->next);
 			spin_unlock_bh(&ip_ra_lock);
 
 			if (ra->destructor)
 				ra->destructor(sk);
-			sock_put(sk);
-			call_rcu(&ra->rcu, ip_ra_free_rcu);
+			/*
+			 * Delay sock_put(sk) and kfree(ra) after one rcu grace
+			 * period. This guarantee ip_call_ra_chain() dont need
+			 * to mess with socket refcounts.
+			 */
+			ra->saved_sk = sk;
+			call_rcu(&ra->rcu, ip_ra_destroy_rcu);
 			return 0;
 		}
 	}
-- 
cgit v1.2.3-58-ga151


From ae638c47dc040b8def16d05dc6acdd527628f231 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 8 Jun 2010 23:39:10 +0000
Subject: pkt_sched: gen_estimator: add a new lock

gen_kill_estimator() / gen_new_estimator() is not always called with
RTNL held.

net/netfilter/xt_RATEEST.c is one user of these API that do not hold
RTNL, so random corruptions can occur between "tc" and "iptables".

Add a new fine grained lock instead of trying to use RTNL in netfilter.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/gen_estimator.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index cf8e70392fe0..785e5276a300 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -107,6 +107,7 @@ static DEFINE_RWLOCK(est_lock);
 
 /* Protects against soft lockup during large deletion */
 static struct rb_root est_root = RB_ROOT;
+static DEFINE_SPINLOCK(est_tree_lock);
 
 static void est_timer(unsigned long arg)
 {
@@ -201,7 +202,6 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats
  *
  * Returns 0 on success or a negative error code.
  *
- * NOTE: Called under rtnl_mutex
  */
 int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_rate_est *rate_est,
@@ -232,6 +232,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 	est->last_packets = bstats->packets;
 	est->avpps = rate_est->pps<<10;
 
+	spin_lock(&est_tree_lock);
 	if (!elist[idx].timer.function) {
 		INIT_LIST_HEAD(&elist[idx].list);
 		setup_timer(&elist[idx].timer, est_timer, idx);
@@ -242,6 +243,7 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 
 	list_add_rcu(&est->list, &elist[idx].list);
 	gen_add_node(est);
+	spin_unlock(&est_tree_lock);
 
 	return 0;
 }
@@ -261,13 +263,13 @@ static void __gen_kill_estimator(struct rcu_head *head)
  *
  * Removes the rate estimator specified by &bstats and &rate_est.
  *
- * NOTE: Called under rtnl_mutex
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
 {
 	struct gen_estimator *e;
 
+	spin_lock(&est_tree_lock);
 	while ((e = gen_find_node(bstats, rate_est))) {
 		rb_erase(&e->node, &est_root);
 
@@ -278,6 +280,7 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 		list_del_rcu(&e->list);
 		call_rcu(&e->e_rcu, __gen_kill_estimator);
 	}
+	spin_unlock(&est_tree_lock);
 }
 EXPORT_SYMBOL(gen_kill_estimator);
 
@@ -312,8 +315,14 @@ EXPORT_SYMBOL(gen_replace_estimator);
 bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 			  const struct gnet_stats_rate_est *rate_est)
 {
+	bool res;
+
 	ASSERT_RTNL();
 
-	return gen_find_node(bstats, rate_est) != NULL;
+	spin_lock(&est_tree_lock);
+	res = gen_find_node(bstats, rate_est) != NULL;
+	spin_unlock(&est_tree_lock);
+
+	return res;
 }
 EXPORT_SYMBOL(gen_estimator_active);
-- 
cgit v1.2.3-58-ga151


From 07a0f0f07a68014c92c752a5598102372bddf46e Mon Sep 17 00:00:00 2001
From: Daniel Turull <daniel.turull@gmail.com>
Date: Thu, 10 Jun 2010 23:08:11 -0700
Subject: pktgen: Fix accuracy of inter-packet delay.

This patch correct a bug in the delay of pktgen.
It makes sure the inter-packet interval is accurate.

Signed-off-by: Daniel Turull <daniel.turull@gmail.com>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 2ad68da418df..1dacd7ba8dbb 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2170,7 +2170,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 	end_time = ktime_now();
 
 	pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time));
-	pkt_dev->next_tx = ktime_add_ns(end_time, pkt_dev->delay);
+	pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
 }
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
-- 
cgit v1.2.3-58-ga151


From d8d1f30b95a635dbd610dcc5eb641aca8f4768cf Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 10 Jun 2010 23:31:35 -0700
Subject: net-next: remove useless union keyword

remove useless union keyword in rtable, rt6_info and dn_route.

Since there is only one member in a union, the union keyword isn't useful.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/addr.c          |   2 +-
 drivers/infiniband/hw/cxgb3/iwch_cm.c   |   4 +-
 drivers/infiniband/hw/cxgb4/cm.c        |   4 +-
 drivers/infiniband/hw/nes/nes_cm.c      |   2 +-
 drivers/net/bonding/bond_main.c         |   6 +-
 drivers/net/cnic.c                      |   2 +-
 drivers/scsi/cxgb3i/cxgb3i_offload.c    |   4 +-
 include/net/dn_route.h                  |   4 +-
 include/net/ip6_fib.h                   |  10 +-
 include/net/ipip.h                      |   2 +-
 include/net/route.h                     |   6 +-
 net/atm/clip.c                          |   2 +-
 net/bridge/br_device.c                  |   2 +-
 net/bridge/br_netfilter.c               |  20 +-
 net/dccp/ipv4.c                         |   4 +-
 net/decnet/dn_route.c                   | 158 ++++++------
 net/ethernet/eth.c                      |   5 +-
 net/ipv4/af_inet.c                      |   4 +-
 net/ipv4/arp.c                          |  12 +-
 net/ipv4/datagram.c                     |   2 +-
 net/ipv4/icmp.c                         |  18 +-
 net/ipv4/igmp.c                         |  10 +-
 net/ipv4/inet_connection_sock.c         |   2 +-
 net/ipv4/ip_forward.c                   |  10 +-
 net/ipv4/ip_gre.c                       |  14 +-
 net/ipv4/ip_input.c                     |   4 +-
 net/ipv4/ip_output.c                    |  60 ++---
 net/ipv4/ipip.c                         |   8 +-
 net/ipv4/ipmr.c                         |   8 +-
 net/ipv4/netfilter.c                    |   8 +-
 net/ipv4/raw.c                          |  16 +-
 net/ipv4/route.c                        | 420 ++++++++++++++++----------------
 net/ipv4/syncookies.c                   |   6 +-
 net/ipv4/tcp_ipv4.c                     |   2 +-
 net/ipv4/udp.c                          |   4 +-
 net/ipv4/xfrm4_policy.c                 |   2 +-
 net/ipv6/addrconf.c                     |  10 +-
 net/ipv6/anycast.c                      |   6 +-
 net/ipv6/fib6_rules.c                   |  10 +-
 net/ipv6/ip6_fib.c                      |  30 +--
 net/ipv6/ip6_output.c                   |  38 +--
 net/ipv6/ip6_tunnel.c                   |   8 +-
 net/ipv6/mcast.c                        |   4 +-
 net/ipv6/ndisc.c                        |   8 +-
 net/ipv6/raw.c                          |  12 +-
 net/ipv6/route.c                        | 300 +++++++++++------------
 net/ipv6/sit.c                          |   8 +-
 net/l2tp/l2tp_ip.c                      |   6 +-
 net/netfilter/ipvs/ip_vs_xmit.c         |  86 +++----
 net/netfilter/nf_conntrack_h323_main.c  |  12 +-
 net/netfilter/nf_conntrack_netbios_ns.c |   2 +-
 net/netfilter/xt_TCPMSS.c               |   4 +-
 net/netfilter/xt_TEE.c                  |   4 +-
 net/rxrpc/ar-peer.c                     |   4 +-
 net/sctp/protocol.c                     |   4 +-
 55 files changed, 694 insertions(+), 709 deletions(-)

(limited to 'net')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 0b926e45afe2..a5ea1bce9689 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -215,7 +215,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 
 	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
 	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
-		neigh_event_send(rt->u.dst.neighbour, NULL);
+		neigh_event_send(rt->dst.neighbour, NULL);
 		ret = -ENODATA;
 		if (neigh)
 			goto release;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index ebfb117ba68b..abd683ea326d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1364,7 +1364,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		       __func__);
 		goto reject;
 	}
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	l2t = t3_l2t_get(tdev, dst->neighbour, dst->neighbour->dev);
 	if (!l2t) {
 		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
@@ -1932,7 +1932,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		err = -EHOSTUNREACH;
 		goto fail3;
 	}
-	ep->dst = &rt->u.dst;
+	ep->dst = &rt->dst;
 
 	/* get a l2t entry */
 	ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst->neighbour,
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 30ce0a8eca09..8b693c8c25e2 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1364,7 +1364,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 		       __func__);
 		goto reject;
 	}
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	if (dst->neighbour->dev->flags & IFF_LOOPBACK) {
 		pdev = ip_dev_find(&init_net, peer_ip);
 		BUG_ON(!pdev);
@@ -1938,7 +1938,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		err = -EHOSTUNREACH;
 		goto fail3;
 	}
-	ep->dst = &rt->u.dst;
+	ep->dst = &rt->dst;
 
 	/* get a l2t entry */
 	if (ep->dst->neighbour->dev->flags & IFF_LOOPBACK) {
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 986d6f32dded..d876d0435cd4 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1146,7 +1146,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
 	}
 
 	if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID)))
-		neigh_event_send(rt->u.dst.neighbour, NULL);
+		neigh_event_send(rt->dst.neighbour, NULL);
 
 	ip_rt_put(rt);
 	return rc;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1b19276cff12..ac4f94b7da37 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2584,7 +2584,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		/*
 		 * This target is not on a VLAN
 		 */
-		if (rt->u.dst.dev == bond->dev) {
+		if (rt->dst.dev == bond->dev) {
 			ip_rt_put(rt);
 			pr_debug("basa: rtdev == bond->dev: arp_send\n");
 			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
@@ -2595,7 +2595,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		vlan_id = 0;
 		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
 			vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id);
-			if (vlan_dev == rt->u.dst.dev) {
+			if (vlan_dev == rt->dst.dev) {
 				vlan_id = vlan->vlan_id;
 				pr_debug("basa: vlan match on %s %d\n",
 				       vlan_dev->name, vlan_id);
@@ -2613,7 +2613,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 		if (net_ratelimit()) {
 			pr_warning("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
 				   bond->dev->name, &fl.fl4_dst,
-				   rt->u.dst.dev ? rt->u.dst.dev->name : "NULL");
+				   rt->dst.dev ? rt->dst.dev->name : "NULL");
 		}
 		ip_rt_put(rt);
 	}
diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index fe925663d39a..908d89a4fe86 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -2824,7 +2824,7 @@ static int cnic_get_v4_route(struct sockaddr_in *dst_addr,
 
 	err = ip_route_output_key(&init_net, &rt, &fl);
 	if (!err)
-		*dst = &rt->u.dst;
+		*dst = &rt->dst;
 	return err;
 #else
 	return -ENETUNREACH;
diff --git a/drivers/scsi/cxgb3i/cxgb3i_offload.c b/drivers/scsi/cxgb3i/cxgb3i_offload.c
index a175be9c496f..3b6a06eebf7f 100644
--- a/drivers/scsi/cxgb3i/cxgb3i_offload.c
+++ b/drivers/scsi/cxgb3i/cxgb3i_offload.c
@@ -1587,7 +1587,7 @@ cxgb3i_find_dev(struct net_device *dev, __be32 ipaddr)
 
 	err = ip_route_output_key(dev ? dev_net(dev) : &init_net, &rt, &fl);
 	if (!err)
-		return (&rt->u.dst)->dev;
+		return (&rt->dst)->dev;
 
 	return NULL;
 }
@@ -1649,7 +1649,7 @@ int cxgb3i_c3cn_connect(struct net_device *dev, struct s3_conn *c3cn,
 		c3cn->saddr.sin_addr.s_addr = rt->rt_src;
 
 	/* now commit destination to connection */
-	c3cn->dst_cache = &rt->u.dst;
+	c3cn->dst_cache = &rt->dst;
 
 	/* try to establish an offloaded connection */
 	dev = cxgb3_egress_dev(c3cn->dst_cache->dev, c3cn, 0);
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index 60c9f22d8694..ccadab3aa3f6 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -65,9 +65,7 @@ extern void dn_rt_cache_flush(int delay);
  * packets to the originating host.
  */
 struct dn_route {
-	union {
-		struct dst_entry dst;
-	} u;
+	struct dst_entry dst;
 
 	struct flowi fl;
 
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 4b1dc1161c37..062a823d311c 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -84,13 +84,11 @@ struct rt6key {
 struct fib6_table;
 
 struct rt6_info {
-	union {
-		struct dst_entry	dst;
-	} u;
+	struct dst_entry		dst;
 
-#define rt6i_dev			u.dst.dev
-#define rt6i_nexthop			u.dst.neighbour
-#define rt6i_expires			u.dst.expires
+#define rt6i_dev			dst.dev
+#define rt6i_nexthop			dst.neighbour
+#define rt6i_expires			dst.expires
 
 	/*
 	 * Tail elements of dst_entry (__refcnt etc.)
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 11e8513d2d07..65caea8b414f 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -50,7 +50,7 @@ struct ip_tunnel_prl_entry {
 	int pkt_len = skb->len - skb_transport_offset(skb);		\
 									\
 	skb->ip_summed = CHECKSUM_NONE;					\
-	ip_select_ident(iph, &rt->u.dst, NULL);				\
+	ip_select_ident(iph, &rt->dst, NULL);				\
 									\
 	err = ip_local_out(skb);					\
 	if (likely(net_xmit_eval(err) == 0)) {				\
diff --git a/include/net/route.h b/include/net/route.h
index af6cf4b4c9dc..bd732d62e1c3 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -50,9 +50,7 @@
 struct fib_nh;
 struct inet_peer;
 struct rtable {
-	union {
-		struct dst_entry	dst;
-	} u;
+	struct dst_entry	dst;
 
 	/* Cache lookup keys */
 	struct flowi		fl;
@@ -144,7 +142,7 @@ extern void fib_add_ifaddr(struct in_ifaddr *);
 static inline void ip_rt_put(struct rtable * rt)
 {
 	if (rt)
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 }
 
 #define IPTOS_RT_MASK	(IPTOS_TOS_MASK & ~3)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 313aba11316b..95fdd1185067 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -522,7 +522,7 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
 	error = ip_route_output_key(&init_net, &rt, &fl);
 	if (error)
 		return error;
-	neigh = __neigh_lookup(&clip_tbl, &ip, rt->u.dst.dev, 1);
+	neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
 	ip_rt_put(rt);
 	if (!neigh)
 		return -ENOMEM;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eedf2c94820e..b898364beaf5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -127,7 +127,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 	/* remember the MTU in the rtable for PMTU */
-	br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu;
+	br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu;
 #endif
 
 	return 0;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72f..0685b2558ab5 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -117,12 +117,12 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 {
 	struct rtable *rt = &br->fake_rtable;
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.dev = br->dev;
-	rt->u.dst.path = &rt->u.dst;
-	rt->u.dst.metrics[RTAX_MTU - 1] = 1500;
-	rt->u.dst.flags	= DST_NOXFRM;
-	rt->u.dst.ops = &fake_dst_ops;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.dev = br->dev;
+	rt->dst.path = &rt->dst;
+	rt->dst.metrics[RTAX_MTU - 1] = 1500;
+	rt->dst.flags	= DST_NOXFRM;
+	rt->dst.ops = &fake_dst_ops;
 }
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
@@ -244,8 +244,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 		kfree_skb(skb);
 		return 0;
 	}
-	dst_hold(&rt->u.dst);
-	skb_dst_set(skb, &rt->u.dst);
+	dst_hold(&rt->dst);
+	skb_dst_set(skb, &rt->dst);
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
@@ -396,8 +396,8 @@ bridged_dnat:
 			kfree_skb(skb);
 			return 0;
 		}
-		dst_hold(&rt->u.dst);
-		skb_dst_set(skb, &rt->u.dst);
+		dst_hold(&rt->dst);
+		skb_dst_set(skb, &rt->dst);
 	}
 
 	skb->dev = nf_bridge->physindev;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d9b11ef8694c..d4a166f0f391 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -105,7 +105,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		goto failure;
 
 	/* OK, now commit destination to socket.  */
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr,
 						    inet->inet_daddr,
@@ -475,7 +475,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 		return NULL;
 	}
 
-	return &rt->u.dst;
+	return &rt->dst;
 }
 
 static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 812e6dff6067..6585ea6d1182 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -146,13 +146,13 @@ static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
 
 static inline void dnrt_free(struct dn_route *rt)
 {
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline void dnrt_drop(struct dn_route *rt)
 {
-	dst_release(&rt->u.dst);
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	dst_release(&rt->dst);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static void dn_dst_check_expire(unsigned long dummy)
@@ -167,13 +167,13 @@ static void dn_dst_check_expire(unsigned long dummy)
 
 		spin_lock(&dn_rt_hash_table[i].lock);
 		while((rt=*rtp) != NULL) {
-			if (atomic_read(&rt->u.dst.__refcnt) ||
-					(now - rt->u.dst.lastuse) < expire) {
-				rtp = &rt->u.dst.dn_next;
+			if (atomic_read(&rt->dst.__refcnt) ||
+					(now - rt->dst.lastuse) < expire) {
+				rtp = &rt->dst.dn_next;
 				continue;
 			}
-			*rtp = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			*rtp = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dnrt_free(rt);
 		}
 		spin_unlock(&dn_rt_hash_table[i].lock);
@@ -198,13 +198,13 @@ static int dn_dst_gc(struct dst_ops *ops)
 		rtp = &dn_rt_hash_table[i].chain;
 
 		while((rt=*rtp) != NULL) {
-			if (atomic_read(&rt->u.dst.__refcnt) ||
-					(now - rt->u.dst.lastuse) < expire) {
-				rtp = &rt->u.dst.dn_next;
+			if (atomic_read(&rt->dst.__refcnt) ||
+					(now - rt->dst.lastuse) < expire) {
+				rtp = &rt->dst.dn_next;
 				continue;
 			}
-			*rtp = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			*rtp = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dnrt_drop(rt);
 			break;
 		}
@@ -287,25 +287,25 @@ static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route *
 	while((rth = *rthp) != NULL) {
 		if (compare_keys(&rth->fl, &rt->fl)) {
 			/* Put it first */
-			*rthp = rth->u.dst.dn_next;
-			rcu_assign_pointer(rth->u.dst.dn_next,
+			*rthp = rth->dst.dn_next;
+			rcu_assign_pointer(rth->dst.dn_next,
 					   dn_rt_hash_table[hash].chain);
 			rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->u.dst, now);
+			dst_use(&rth->dst, now);
 			spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 
 			dnrt_drop(rt);
 			*rp = rth;
 			return 0;
 		}
-		rthp = &rth->u.dst.dn_next;
+		rthp = &rth->dst.dn_next;
 	}
 
-	rcu_assign_pointer(rt->u.dst.dn_next, dn_rt_hash_table[hash].chain);
+	rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
 	rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
 
-	dst_use(&rt->u.dst, now);
+	dst_use(&rt->dst, now);
 	spin_unlock_bh(&dn_rt_hash_table[hash].lock);
 	*rp = rt;
 	return 0;
@@ -323,8 +323,8 @@ static void dn_run_flush(unsigned long dummy)
 			goto nothing_to_declare;
 
 		for(; rt; rt=next) {
-			next = rt->u.dst.dn_next;
-			rt->u.dst.dn_next = NULL;
+			next = rt->dst.dn_next;
+			rt->dst.dn_next = NULL;
 			dst_free((struct dst_entry *)rt);
 		}
 
@@ -743,7 +743,7 @@ static int dn_forward(struct sk_buff *skb)
 	/* Ensure that we have enough space for headers */
 	rt = (struct dn_route *)skb_dst(skb);
 	header_len = dn_db->use_long ? 21 : 6;
-	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
 		goto drop;
 
 	/*
@@ -752,7 +752,7 @@ static int dn_forward(struct sk_buff *skb)
 	if (++cb->hops > 30)
 		goto drop;
 
-	skb->dev = rt->u.dst.dev;
+	skb->dev = rt->dst.dev;
 
 	/*
 	 * If packet goes out same interface it came in on, then set
@@ -792,7 +792,7 @@ static int dn_rt_bug(struct sk_buff *skb)
 static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 {
 	struct dn_fib_info *fi = res->fi;
-	struct net_device *dev = rt->u.dst.dev;
+	struct net_device *dev = rt->dst.dev;
 	struct neighbour *n;
 	unsigned mss;
 
@@ -800,25 +800,25 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
 		if (DN_FIB_RES_GW(*res) &&
 		    DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = DN_FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->dst.metrics, fi->fib_metrics,
+		       sizeof(rt->dst.metrics));
 	}
 	rt->rt_type = res->type;
 
-	if (dev != NULL && rt->u.dst.neighbour == NULL) {
+	if (dev != NULL && rt->dst.neighbour == NULL) {
 		n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
 		if (IS_ERR(n))
 			return PTR_ERR(n);
-		rt->u.dst.neighbour = n;
+		rt->dst.neighbour = n;
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu)
-		rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst));
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 ||
-	    dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = mss;
+	if (dst_metric(&rt->dst, RTAX_MTU) == 0 ||
+	    dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
+		rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 ||
+	    dst_metric(&rt->dst, RTAX_ADVMSS) > mss)
+		rt->dst.metrics[RTAX_ADVMSS-1] = mss;
 	return 0;
 }
 
@@ -1096,8 +1096,8 @@ make_route:
 	if (rt == NULL)
 		goto e_nobufs;
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.flags   = DST_HOST;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.flags   = DST_HOST;
 
 	rt->fl.fld_src    = oldflp->fld_src;
 	rt->fl.fld_dst    = oldflp->fld_dst;
@@ -1113,17 +1113,17 @@ make_route:
 	rt->rt_dst_map    = fl.fld_dst;
 	rt->rt_src_map    = fl.fld_src;
 
-	rt->u.dst.dev = dev_out;
+	rt->dst.dev = dev_out;
 	dev_hold(dev_out);
-	rt->u.dst.neighbour = neigh;
+	rt->dst.neighbour = neigh;
 	neigh = NULL;
 
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.output  = dn_output;
-	rt->u.dst.input   = dn_rt_bug;
+	rt->dst.lastuse = jiffies;
+	rt->dst.output  = dn_output;
+	rt->dst.input   = dn_rt_bug;
 	rt->rt_flags      = flags;
 	if (flags & RTCF_LOCAL)
-		rt->u.dst.input = dn_nsp_rx;
+		rt->dst.input = dn_nsp_rx;
 
 	err = dn_rt_set_next_hop(rt, &res);
 	if (err)
@@ -1152,7 +1152,7 @@ e_nobufs:
 	err = -ENOBUFS;
 	goto done;
 e_neighbour:
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	goto e_nobufs;
 }
 
@@ -1168,15 +1168,15 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *fl
 	if (!(flags & MSG_TRYHARD)) {
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
-			rt = rcu_dereference_bh(rt->u.dst.dn_next)) {
+			rt = rcu_dereference_bh(rt->dst.dn_next)) {
 			if ((flp->fld_dst == rt->fl.fld_dst) &&
 			    (flp->fld_src == rt->fl.fld_src) &&
 			    (flp->mark == rt->fl.mark) &&
 			    (rt->fl.iif == 0) &&
 			    (rt->fl.oif == flp->oif)) {
-				dst_use(&rt->u.dst, jiffies);
+				dst_use(&rt->dst, jiffies);
 				rcu_read_unlock_bh();
-				*pprt = &rt->u.dst;
+				*pprt = &rt->dst;
 				return 0;
 			}
 		}
@@ -1375,29 +1375,29 @@ make_route:
 	rt->fl.iif        = in_dev->ifindex;
 	rt->fl.mark       = fl.mark;
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.neighbour = neigh;
-	rt->u.dst.dev = out_dev;
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.output = dn_rt_bug;
+	rt->dst.flags = DST_HOST;
+	rt->dst.neighbour = neigh;
+	rt->dst.dev = out_dev;
+	rt->dst.lastuse = jiffies;
+	rt->dst.output = dn_rt_bug;
 	switch(res.type) {
 		case RTN_UNICAST:
-			rt->u.dst.input = dn_forward;
+			rt->dst.input = dn_forward;
 			break;
 		case RTN_LOCAL:
-			rt->u.dst.output = dn_output;
-			rt->u.dst.input = dn_nsp_rx;
-			rt->u.dst.dev = in_dev;
+			rt->dst.output = dn_output;
+			rt->dst.input = dn_nsp_rx;
+			rt->dst.dev = in_dev;
 			flags |= RTCF_LOCAL;
 			break;
 		default:
 		case RTN_UNREACHABLE:
 		case RTN_BLACKHOLE:
-			rt->u.dst.input = dst_discard;
+			rt->dst.input = dst_discard;
 	}
 	rt->rt_flags = flags;
-	if (rt->u.dst.dev)
-		dev_hold(rt->u.dst.dev);
+	if (rt->dst.dev)
+		dev_hold(rt->dst.dev);
 
 	err = dn_rt_set_next_hop(rt, &res);
 	if (err)
@@ -1405,7 +1405,7 @@ make_route:
 
 	hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst);
 	dn_insert_route(rt, hash, &rt);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 done:
 	if (neigh)
@@ -1427,7 +1427,7 @@ e_nobufs:
 	goto done;
 
 e_neighbour:
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	goto done;
 }
 
@@ -1442,13 +1442,13 @@ static int dn_route_input(struct sk_buff *skb)
 
 	rcu_read_lock();
 	for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
-	    rt = rcu_dereference(rt->u.dst.dn_next)) {
+	    rt = rcu_dereference(rt->dst.dn_next)) {
 		if ((rt->fl.fld_src == cb->src) &&
 		    (rt->fl.fld_dst == cb->dst) &&
 		    (rt->fl.oif == 0) &&
 		    (rt->fl.mark == skb->mark) &&
 		    (rt->fl.iif == cb->iif)) {
-			dst_use(&rt->u.dst, jiffies);
+			dst_use(&rt->dst, jiffies);
 			rcu_read_unlock();
 			skb_dst_set(skb, (struct dst_entry *)rt);
 			return 0;
@@ -1487,8 +1487,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 		r->rtm_src_len = 16;
 		RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src);
 	}
-	if (rt->u.dst.dev)
-		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+	if (rt->dst.dev)
+		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->dst.dev->ifindex);
 	/*
 	 * Note to self - change this if input routes reverse direction when
 	 * they deal only with inputs and not with replies like they do
@@ -1497,11 +1497,11 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
 	RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src);
 	if (rt->rt_daddr != rt->rt_gateway)
 		RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway);
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto rtattr_failure;
-	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires,
-			       rt->u.dst.error) < 0)
+	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires,
+			       rt->dst.error) < 0)
 		goto rtattr_failure;
 	if (rt->fl.iif)
 		RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
@@ -1568,8 +1568,8 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 		local_bh_enable();
 		memset(cb, 0, sizeof(struct dn_skb_cb));
 		rt = (struct dn_route *)skb_dst(skb);
-		if (!err && -rt->u.dst.error)
-			err = rt->u.dst.error;
+		if (!err && -rt->dst.error)
+			err = rt->dst.error;
 	} else {
 		int oif = 0;
 		if (rta[RTA_OIF - 1])
@@ -1583,7 +1583,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 	skb->dev = NULL;
 	if (err)
 		goto out_free;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -1632,10 +1632,10 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		rcu_read_lock_bh();
 		for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
 			rt;
-			rt = rcu_dereference_bh(rt->u.dst.dn_next), idx++) {
+			rt = rcu_dereference_bh(rt->dst.dn_next), idx++) {
 			if (idx < s_idx)
 				continue;
-			skb_dst_set(skb, dst_clone(&rt->u.dst));
+			skb_dst_set(skb, dst_clone(&rt->dst));
 			if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
 					cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					1, NLM_F_MULTI) <= 0) {
@@ -1678,7 +1678,7 @@ static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_rou
 {
 	struct dn_rt_cache_iter_state *s = seq->private;
 
-	rt = rt->u.dst.dn_next;
+	rt = rt->dst.dn_next;
 	while(!rt) {
 		rcu_read_unlock_bh();
 		if (--s->bucket < 0)
@@ -1719,12 +1719,12 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
 	char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
 
 	seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
-			rt->u.dst.dev ? rt->u.dst.dev->name : "*",
+			rt->dst.dev ? rt->dst.dev->name : "*",
 			dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
 			dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
-			atomic_read(&rt->u.dst.__refcnt),
-			rt->u.dst.__use,
-			(int) dst_metric(&rt->u.dst, RTAX_RTT));
+			atomic_read(&rt->dst.__refcnt),
+			rt->dst.__use,
+			(int) dst_metric(&rt->dst, RTAX_RTT));
 	return 0;
 }
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 61ec0329316c..215c83986a9d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -158,7 +158,6 @@ EXPORT_SYMBOL(eth_rebuild_header);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ethhdr *eth;
-	unsigned char *rawp;
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
@@ -199,15 +198,13 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	if (ntohs(eth->h_proto) >= 1536)
 		return eth->h_proto;
 
-	rawp = skb->data;
-
 	/*
 	 *      This is a magic hack to spot IPX packets. Older Novell breaks
 	 *      the protocol design and runs IPX over 802.3 without an 802.2 LLC
 	 *      layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
 	 *      won't work for fault tolerant netware but does for the rest.
 	 */
-	if (*(unsigned short *)rawp == 0xFFFF)
+	if (skb->len >= 2 && *(unsigned short *)(skb->data) == 0xFFFF)
 		return htons(ETH_P_802_3);
 
 	/*
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 551ce564b035..d99e7e020189 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1100,7 +1100,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
 	if (err)
 		return err;
 
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	new_saddr = rt->rt_src;
 
@@ -1166,7 +1166,7 @@ int inet_sk_rebuild_header(struct sock *sk)
 	err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
 }
 	if (!err)
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	else {
 		/* Routing failed... */
 		sk->sk_route_caps = 0;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 917d2d66162e..cf78f41830ca 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -427,7 +427,7 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
 
 	if (ip_route_output_key(net, &rt, &fl) < 0)
 		return 1;
-	if (rt->u.dst.dev != dev) {
+	if (rt->dst.dev != dev) {
 		NET_INC_STATS_BH(net, LINUX_MIB_ARPFILTER);
 		flag = 1;
 	}
@@ -532,7 +532,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 	struct in_device *out_dev;
 	int imi, omi = -1;
 
-	if (rt->u.dst.dev == dev)
+	if (rt->dst.dev == dev)
 		return 0;
 
 	if (!IN_DEV_PROXY_ARP(in_dev))
@@ -545,7 +545,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
 
 	/* place to check for proxy_arp for routes */
 
-	out_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	out_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (out_dev)
 		omi = IN_DEV_MEDIUM_ID(out_dev);
 
@@ -576,7 +576,7 @@ static inline int arp_fwd_pvlan(struct in_device *in_dev,
 				__be32 sip, __be32 tip)
 {
 	/* Private VLAN is only concerned about the same ethernet segment */
-	if (rt->u.dst.dev != dev)
+	if (rt->dst.dev != dev)
 		return 0;
 
 	/* Don't reply on self probes (often done by windowz boxes)*/
@@ -1042,7 +1042,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
 		struct rtable * rt;
 		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
 			return -EINVAL;
@@ -1149,7 +1149,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
 		struct rtable * rt;
 		if ((err = ip_route_output_key(net, &rt, &fl)) != 0)
 			return err;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (!dev)
 			return -EINVAL;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fb2465811b48..fe3daa7f07a9 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -69,7 +69,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
-	sk_dst_set(sk, &rt->u.dst);
+	sk_dst_set(sk, &rt->dst);
 	return(0);
 }
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index bdb6c71e72a6..7569b21a3a2d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -271,7 +271,7 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
 static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		int type, int code)
 {
-	struct dst_entry *dst = &rt->u.dst;
+	struct dst_entry *dst = &rt->dst;
 	int rc = 1;
 
 	if (type > NR_ICMP_TYPES)
@@ -327,7 +327,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 	struct sock *sk;
 	struct sk_buff *skb;
 
-	sk = icmp_sk(dev_net((*rt)->u.dst.dev));
+	sk = icmp_sk(dev_net((*rt)->dst.dev));
 	if (ip_append_data(sk, icmp_glue_bits, icmp_param,
 			   icmp_param->data_len+icmp_param->head_len,
 			   icmp_param->head_len,
@@ -359,7 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
 {
 	struct ipcm_cookie ipc;
 	struct rtable *rt = skb_rtable(skb);
-	struct net *net = dev_net(rt->u.dst.dev);
+	struct net *net = dev_net(rt->dst.dev);
 	struct sock *sk;
 	struct inet_sock *inet;
 	__be32 daddr;
@@ -427,7 +427,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 
 	if (!rt)
 		goto out;
-	net = dev_net(rt->u.dst.dev);
+	net = dev_net(rt->dst.dev);
 
 	/*
 	 *	Find the original header. It is expected to be valid, of course.
@@ -596,9 +596,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			/* Ugh! */
 			orefdst = skb_in->_skb_refdst; /* save old refdst */
 			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
-					     RT_TOS(tos), rt2->u.dst.dev);
+					     RT_TOS(tos), rt2->dst.dev);
 
-			dst_release(&rt2->u.dst);
+			dst_release(&rt2->dst);
 			rt2 = skb_rtable(skb_in);
 			skb_in->_skb_refdst = orefdst; /* restore old refdst */
 		}
@@ -610,7 +610,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 				  XFRM_LOOKUP_ICMP);
 		switch (err) {
 		case 0:
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			rt = rt2;
 			break;
 		case -EPERM:
@@ -629,7 +629,7 @@ route_done:
 
 	/* RFC says return as much as we can without exceeding 576 bytes. */
 
-	room = dst_mtu(&rt->u.dst);
+	room = dst_mtu(&rt->dst);
 	if (room > 576)
 		room = 576;
 	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
@@ -972,7 +972,7 @@ int icmp_rcv(struct sk_buff *skb)
 {
 	struct icmphdr *icmph;
 	struct rtable *rt = skb_rtable(skb);
-	struct net *net = dev_net(rt->u.dst.dev);
+	struct net *net = dev_net(rt->dst.dev);
 
 	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
 		struct sec_path *sp = skb_sec_path(skb);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 3294f547c481..b5580d422994 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -312,7 +312,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 		return NULL;
 	}
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	skb->dev = dev;
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
@@ -330,7 +330,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
 	pip->saddr    = rt->rt_src;
 	pip->protocol = IPPROTO_IGMP;
 	pip->tot_len  = 0;	/* filled in later */
-	ip_select_ident(pip, &rt->u.dst, NULL);
+	ip_select_ident(pip, &rt->dst, NULL);
 	((u8*)&pip[1])[0] = IPOPT_RA;
 	((u8*)&pip[1])[1] = 4;
 	((u8*)&pip[1])[2] = 0;
@@ -660,7 +660,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 		return -1;
 	}
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	skb_reserve(skb, LL_RESERVED_SPACE(dev));
 
@@ -676,7 +676,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
 	iph->daddr    = dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = IPPROTO_IGMP;
-	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_select_ident(iph, &rt->dst, NULL);
 	((u8*)&iph[1])[0] = IPOPT_RA;
 	((u8*)&iph[1])[1] = 4;
 	((u8*)&iph[1])[2] = 0;
@@ -1425,7 +1425,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
 	}
 
 	if (!dev && !ip_route_output_key(net, &rt, &fl)) {
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 	}
 	if (dev) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 70eb3507c406..57c9e4d7b805 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -383,7 +383,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
 		goto no_route;
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto route_err;
-	return &rt->u.dst;
+	return &rt->dst;
 
 route_err:
 	ip_rt_put(rt);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 56cdf68a074c..99461f09320f 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -87,16 +87,16 @@ int ip_forward(struct sk_buff *skb)
 	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
 		goto sr_failed;
 
-	if (unlikely(skb->len > dst_mtu(&rt->u.dst) && !skb_is_gso(skb) &&
+	if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
 		     (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
-		IP_INC_STATS(dev_net(rt->u.dst.dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-			  htonl(dst_mtu(&rt->u.dst)));
+			  htonl(dst_mtu(&rt->dst)));
 		goto drop;
 	}
 
 	/* We are about to mangle packet. Copy it! */
-	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+	if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+rt->dst.header_len))
 		goto drop;
 	iph = ip_hdr(skb);
 
@@ -113,7 +113,7 @@ int ip_forward(struct sk_buff *skb)
 	skb->priority = rt_tos2priority(iph->tos);
 
 	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
-		       rt->u.dst.dev, ip_forward_finish);
+		       rt->dst.dev, ip_forward_finish);
 
 sr_failed:
 	/*
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 32618e11076d..749e54889e82 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -745,7 +745,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -755,7 +755,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 
 	df = tiph->frag_off;
 	if (df)
-		mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
+		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen;
 	else
 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
@@ -803,7 +803,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			tunnel->err_count = 0;
 	}
 
-	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
+	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len;
 
 	if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
 	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
@@ -830,7 +830,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -853,7 +853,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
 #endif
 		else
-			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
+			iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT);
 	}
 
 	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
@@ -915,7 +915,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_GRE };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 
@@ -1174,7 +1174,7 @@ static int ipgre_open(struct net_device *dev)
 		struct rtable *rt;
 		if (ip_route_output_key(dev_net(dev), &rt, &fl))
 			return -EADDRNOTAVAIL;
-		dev = rt->u.dst.dev;
+		dev = rt->dst.dev;
 		ip_rt_put(rt);
 		if (__in_dev_get_rtnl(dev) == NULL)
 			return -EADDRNOTAVAIL;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 08a3b121f908..db47a5a00ed2 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -356,10 +356,10 @@ static int ip_rcv_finish(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt->rt_type == RTN_MULTICAST) {
-		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INMCAST,
+		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
 				skb->len);
 	} else if (rt->rt_type == RTN_BROADCAST)
-		IP_UPD_PO_STATS_BH(dev_net(rt->u.dst.dev), IPSTATS_MIB_INBCAST,
+		IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
 				skb->len);
 
 	return dst_input(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9a4a6c96cb0d..6cbeb2e108de 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -151,15 +151,15 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	iph->version  = 4;
 	iph->ihl      = 5;
 	iph->tos      = inet->tos;
-	if (ip_dont_fragment(sk, &rt->u.dst))
+	if (ip_dont_fragment(sk, &rt->dst))
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->daddr    = rt->rt_dst;
 	iph->saddr    = rt->rt_src;
 	iph->protocol = sk->sk_protocol;
-	ip_select_ident(iph, &rt->u.dst, sk);
+	ip_select_ident(iph, &rt->dst, sk);
 
 	if (opt && opt->optlen) {
 		iph->ihl += opt->optlen>>2;
@@ -240,7 +240,7 @@ int ip_mc_output(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct rtable *rt = skb_rtable(skb);
-	struct net_device *dev = rt->u.dst.dev;
+	struct net_device *dev = rt->dst.dev;
 
 	/*
 	 *	If the indicated interface is up and running, send the packet.
@@ -359,9 +359,9 @@ int ip_queue_xmit(struct sk_buff *skb)
 			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
 				goto no_route;
 		}
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	}
-	skb_dst_set_noref(skb, &rt->u.dst);
+	skb_dst_set_noref(skb, &rt->dst);
 
 packet_routed:
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -372,11 +372,11 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
+	if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
-	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
+	iph->ttl      = ip_select_ttl(inet, &rt->dst);
 	iph->protocol = sk->sk_protocol;
 	iph->saddr    = rt->rt_src;
 	iph->daddr    = rt->rt_dst;
@@ -387,7 +387,7 @@ packet_routed:
 		ip_options_build(skb, opt, inet->inet_daddr, rt, 0);
 	}
 
-	ip_select_ident_more(iph, &rt->u.dst, sk,
+	ip_select_ident_more(iph, &rt->dst, sk,
 			     (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
 	skb->priority = sk->sk_priority;
@@ -452,7 +452,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	struct rtable *rt = skb_rtable(skb);
 	int err = 0;
 
-	dev = rt->u.dst.dev;
+	dev = rt->dst.dev;
 
 	/*
 	 *	Point into the IP datagram header.
@@ -473,7 +473,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	 */
 
 	hlen = iph->ihl * 4;
-	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+	mtu = dst_mtu(&rt->dst) - hlen;	/* Size of data space */
 #ifdef CONFIG_BRIDGE_NETFILTER
 	if (skb->nf_bridge)
 		mtu -= nf_bridge_mtu_reduction(skb);
@@ -586,7 +586,7 @@ slow_path:
 	 * we need to make room for the encapsulating header
 	 */
 	pad = nf_bridge_pad(skb);
-	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad);
+	ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, pad);
 	mtu -= pad;
 
 	/*
@@ -833,13 +833,13 @@ int ip_append_data(struct sock *sk,
 		 */
 		*rtp = NULL;
 		inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ?
-					    rt->u.dst.dev->mtu :
-					    dst_mtu(rt->u.dst.path);
-		inet->cork.dst = &rt->u.dst;
+					    rt->dst.dev->mtu :
+					    dst_mtu(rt->dst.path);
+		inet->cork.dst = &rt->dst;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		if ((exthdrlen = rt->u.dst.header_len) != 0) {
+		if ((exthdrlen = rt->dst.header_len) != 0) {
 			length += exthdrlen;
 			transhdrlen += exthdrlen;
 		}
@@ -852,7 +852,7 @@ int ip_append_data(struct sock *sk,
 		exthdrlen = 0;
 		mtu = inet->cork.fragsize;
 	}
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
@@ -869,14 +869,14 @@ int ip_append_data(struct sock *sk,
 	 */
 	if (transhdrlen &&
 	    length + fragheaderlen <= mtu &&
-	    rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
+	    rt->dst.dev->features & NETIF_F_V4_CSUM &&
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
 	inet->cork.length += length;
 	if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
 					 fragheaderlen, transhdrlen, mtu,
 					 flags);
@@ -924,7 +924,7 @@ alloc_new_skb:
 			fraglen = datalen + fragheaderlen;
 
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
@@ -935,7 +935,7 @@ alloc_new_skb:
 			 * the last.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+				alloclen += rt->dst.trailer_len;
 
 			if (transhdrlen) {
 				skb = sock_alloc_send_skb(sk,
@@ -1008,7 +1008,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1103,10 +1103,10 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 	if (inet->cork.flags & IPCORK_OPT)
 		opt = inet->cork.opt;
 
-	if (!(rt->u.dst.dev->features&NETIF_F_SG))
+	if (!(rt->dst.dev->features&NETIF_F_SG))
 		return -EOPNOTSUPP;
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 	mtu = inet->cork.fragsize;
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1122,7 +1122,7 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 
 	inet->cork.length += size;
 	if ((sk->sk_protocol == IPPROTO_UDP) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	    (rt->dst.dev->features & NETIF_F_UFO)) {
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
 	}
@@ -1274,8 +1274,8 @@ int ip_push_pending_frames(struct sock *sk)
 	 * If local_df is set too, we still allow to fragment this frame
 	 * locally. */
 	if (inet->pmtudisc >= IP_PMTUDISC_DO ||
-	    (skb->len <= dst_mtu(&rt->u.dst) &&
-	     ip_dont_fragment(sk, &rt->u.dst)))
+	    (skb->len <= dst_mtu(&rt->dst) &&
+	     ip_dont_fragment(sk, &rt->dst)))
 		df = htons(IP_DF);
 
 	if (inet->cork.flags & IPCORK_OPT)
@@ -1284,7 +1284,7 @@ int ip_push_pending_frames(struct sock *sk)
 	if (rt->rt_type == RTN_MULTICAST)
 		ttl = inet->mc_ttl;
 	else
-		ttl = ip_select_ttl(inet, &rt->u.dst);
+		ttl = ip_select_ttl(inet, &rt->dst);
 
 	iph = (struct iphdr *)skb->data;
 	iph->version = 4;
@@ -1295,7 +1295,7 @@ int ip_push_pending_frames(struct sock *sk)
 	}
 	iph->tos = inet->tos;
 	iph->frag_off = df;
-	ip_select_ident(iph, &rt->u.dst, sk);
+	ip_select_ident(iph, &rt->dst, sk);
 	iph->ttl = ttl;
 	iph->protocol = sk->sk_protocol;
 	iph->saddr = rt->rt_src;
@@ -1308,7 +1308,7 @@ int ip_push_pending_frames(struct sock *sk)
 	 * on dst refcount
 	 */
 	inet->cork.dst = NULL;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	if (iph->protocol == IPPROTO_ICMP)
 		icmp_out_count(net, ((struct icmphdr *)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 7fd636711037..ec036731a70b 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -435,7 +435,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto tx_error_icmp;
 		}
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -446,7 +446,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	df |= old_iph->frag_off & htons(IP_DF);
 
 	if (df) {
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
 		if (mtu < 68) {
 			stats->collisions++;
@@ -503,7 +503,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_IPIP };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 		dev->flags |= IFF_POINTOPOINT;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 856123fe32f9..8418afc357ee 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1551,9 +1551,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 			goto out_free;
 	}
 
-	dev = rt->u.dst.dev;
+	dev = rt->dst.dev;
 
-	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+	if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
 		/* Do not fragment multicasts. Alas, IPv4 does not
 		   allow to send ICMP, so that packets will disappear
 		   to blackhole.
@@ -1564,7 +1564,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		goto out_free;
 	}
 
-	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
+	encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
 
 	if (skb_cow(skb, encap)) {
 		ip_rt_put(rt);
@@ -1575,7 +1575,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 	vif->bytes_out += skb->len;
 
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	ip_decrease_ttl(ip_hdr(skb));
 
 	/* FIXME: forward and output firewalls used to be called here.
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 07de855e2175..cfbc79af21c3 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -43,7 +43,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 
 		/* Drop old route. */
 		skb_dst_drop(skb);
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set(skb, &rt->dst);
 	} else {
 		/* non-local src, find valid iif to satisfy
 		 * rp-filter when calling ip_route_input. */
@@ -53,11 +53,11 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 
 		orefdst = skb->_skb_refdst;
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
-				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
-			dst_release(&rt->u.dst);
+				   RT_TOS(iph->tos), rt->dst.dev) != 0) {
+			dst_release(&rt->dst);
 			return -1;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		refdst_drop(orefdst);
 	}
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 66cc3befcd44..009a7b2aa1ef 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -325,24 +325,24 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 	int err;
 	struct rtable *rt = *rtp;
 
-	if (length > rt->u.dst.dev->mtu) {
+	if (length > rt->dst.dev->mtu) {
 		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->inet_dport,
-			       rt->u.dst.dev->mtu);
+			       rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
 	skb = sock_alloc_send_skb(sk,
-				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
 
 	skb_reset_network_header(skb);
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 		iph->check   = 0;
 		iph->tot_len = htons(length);
 		if (!iph->id)
-			ip_select_ident(iph, &rt->u.dst, NULL);
+			ip_select_ident(iph, &rt->dst, NULL);
 
 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 	}
@@ -384,7 +384,7 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 			skb_transport_header(skb))->type);
 
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
-		      rt->u.dst.dev, dst_output);
+		      rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
@@ -606,7 +606,7 @@ out:
 	return len;
 
 do_confirm:
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 	if (!(msg->msg_flags & MSG_PROBE) || len)
 		goto back_from_confirm;
 	err = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 883b5c7195ac..a291edbbc97f 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -286,10 +286,10 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
 		rcu_read_lock_bh();
 		r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
 		while (r) {
-			if (dev_net(r->u.dst.dev) == seq_file_net(seq) &&
+			if (dev_net(r->dst.dev) == seq_file_net(seq) &&
 			    r->rt_genid == st->genid)
 				return r;
-			r = rcu_dereference_bh(r->u.dst.rt_next);
+			r = rcu_dereference_bh(r->dst.rt_next);
 		}
 		rcu_read_unlock_bh();
 	}
@@ -301,7 +301,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 
-	r = r->u.dst.rt_next;
+	r = r->dst.rt_next;
 	while (!r) {
 		rcu_read_unlock_bh();
 		do {
@@ -319,7 +319,7 @@ static struct rtable *rt_cache_get_next(struct seq_file *seq,
 {
 	struct rt_cache_iter_state *st = seq->private;
 	while ((r = __rt_cache_get_next(seq, r)) != NULL) {
-		if (dev_net(r->u.dst.dev) != seq_file_net(seq))
+		if (dev_net(r->dst.dev) != seq_file_net(seq))
 			continue;
 		if (r->rt_genid == st->genid)
 			break;
@@ -377,19 +377,19 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
 			      "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
-			r->u.dst.dev ? r->u.dst.dev->name : "*",
+			r->dst.dev ? r->dst.dev->name : "*",
 			(__force u32)r->rt_dst,
 			(__force u32)r->rt_gateway,
-			r->rt_flags, atomic_read(&r->u.dst.__refcnt),
-			r->u.dst.__use, 0, (__force u32)r->rt_src,
-			(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
-			     (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
-			dst_metric(&r->u.dst, RTAX_WINDOW),
-			(int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3) +
-			      dst_metric(&r->u.dst, RTAX_RTTVAR)),
+			r->rt_flags, atomic_read(&r->dst.__refcnt),
+			r->dst.__use, 0, (__force u32)r->rt_src,
+			(dst_metric(&r->dst, RTAX_ADVMSS) ?
+			     (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0),
+			dst_metric(&r->dst, RTAX_WINDOW),
+			(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
+			      dst_metric(&r->dst, RTAX_RTTVAR)),
 			r->fl.fl4_tos,
-			r->u.dst.hh ? atomic_read(&r->u.dst.hh->hh_refcnt) : -1,
-			r->u.dst.hh ? (r->u.dst.hh->hh_output ==
+			r->dst.hh ? atomic_read(&r->dst.hh->hh_refcnt) : -1,
+			r->dst.hh ? (r->dst.hh->hh_output ==
 				       dev_queue_xmit) : 0,
 			r->rt_spec_dst, &len);
 
@@ -608,13 +608,13 @@ static inline int ip_rt_proc_init(void)
 
 static inline void rt_free(struct rtable *rt)
 {
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline void rt_drop(struct rtable *rt)
 {
 	ip_rt_put(rt);
-	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+	call_rcu_bh(&rt->dst.rcu_head, dst_rcu_free);
 }
 
 static inline int rt_fast_clean(struct rtable *rth)
@@ -622,13 +622,13 @@ static inline int rt_fast_clean(struct rtable *rth)
 	/* Kill broadcast/multicast entries very aggresively, if they
 	   collide in hash table with more useful entries */
 	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
-		rth->fl.iif && rth->u.dst.rt_next;
+		rth->fl.iif && rth->dst.rt_next;
 }
 
 static inline int rt_valuable(struct rtable *rth)
 {
 	return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) ||
-		rth->u.dst.expires;
+		rth->dst.expires;
 }
 
 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2)
@@ -636,15 +636,15 @@ static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long t
 	unsigned long age;
 	int ret = 0;
 
-	if (atomic_read(&rth->u.dst.__refcnt))
+	if (atomic_read(&rth->dst.__refcnt))
 		goto out;
 
 	ret = 1;
-	if (rth->u.dst.expires &&
-	    time_after_eq(jiffies, rth->u.dst.expires))
+	if (rth->dst.expires &&
+	    time_after_eq(jiffies, rth->dst.expires))
 		goto out;
 
-	age = jiffies - rth->u.dst.lastuse;
+	age = jiffies - rth->dst.lastuse;
 	ret = 0;
 	if ((age <= tmo1 && !rt_fast_clean(rth)) ||
 	    (age <= tmo2 && rt_valuable(rth)))
@@ -660,7 +660,7 @@ out:	return ret;
  */
 static inline u32 rt_score(struct rtable *rt)
 {
-	u32 score = jiffies - rt->u.dst.lastuse;
+	u32 score = jiffies - rt->dst.lastuse;
 
 	score = ~score & ~(3<<30);
 
@@ -700,12 +700,12 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 
 static inline int compare_netns(struct rtable *rt1, struct rtable *rt2)
 {
-	return net_eq(dev_net(rt1->u.dst.dev), dev_net(rt2->u.dst.dev));
+	return net_eq(dev_net(rt1->dst.dev), dev_net(rt2->dst.dev));
 }
 
 static inline int rt_is_expired(struct rtable *rth)
 {
-	return rth->rt_genid != rt_genid(dev_net(rth->u.dst.dev));
+	return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
 }
 
 /*
@@ -734,7 +734,7 @@ static void rt_do_flush(int process_context)
 		rth = rt_hash_table[i].chain;
 
 		/* defer releasing the head of the list after spin_unlock */
-		for (tail = rth; tail; tail = tail->u.dst.rt_next)
+		for (tail = rth; tail; tail = tail->dst.rt_next)
 			if (!rt_is_expired(tail))
 				break;
 		if (rth != tail)
@@ -743,9 +743,9 @@ static void rt_do_flush(int process_context)
 		/* call rt_free on entries after the tail requiring flush */
 		prev = &rt_hash_table[i].chain;
 		for (p = *prev; p; p = next) {
-			next = p->u.dst.rt_next;
+			next = p->dst.rt_next;
 			if (!rt_is_expired(p)) {
-				prev = &p->u.dst.rt_next;
+				prev = &p->dst.rt_next;
 			} else {
 				*prev = next;
 				rt_free(p);
@@ -760,7 +760,7 @@ static void rt_do_flush(int process_context)
 		spin_unlock_bh(rt_hash_lock_addr(i));
 
 		for (; rth != tail; rth = next) {
-			next = rth->u.dst.rt_next;
+			next = rth->dst.rt_next;
 			rt_free(rth);
 		}
 	}
@@ -791,7 +791,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
 	while (aux != rth) {
 		if (compare_hash_inputs(&aux->fl, &rth->fl))
 			return 0;
-		aux = aux->u.dst.rt_next;
+		aux = aux->dst.rt_next;
 	}
 	return ONE;
 }
@@ -831,18 +831,18 @@ static void rt_check_expire(void)
 		length = 0;
 		spin_lock_bh(rt_hash_lock_addr(i));
 		while ((rth = *rthp) != NULL) {
-			prefetch(rth->u.dst.rt_next);
+			prefetch(rth->dst.rt_next);
 			if (rt_is_expired(rth)) {
-				*rthp = rth->u.dst.rt_next;
+				*rthp = rth->dst.rt_next;
 				rt_free(rth);
 				continue;
 			}
-			if (rth->u.dst.expires) {
+			if (rth->dst.expires) {
 				/* Entry is expired even if it is in use */
-				if (time_before_eq(jiffies, rth->u.dst.expires)) {
+				if (time_before_eq(jiffies, rth->dst.expires)) {
 nofree:
 					tmo >>= 1;
-					rthp = &rth->u.dst.rt_next;
+					rthp = &rth->dst.rt_next;
 					/*
 					 * We only count entries on
 					 * a chain with equal hash inputs once
@@ -858,7 +858,7 @@ nofree:
 				goto nofree;
 
 			/* Cleanup aged off entries. */
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			rt_free(rth);
 		}
 		spin_unlock_bh(rt_hash_lock_addr(i));
@@ -999,10 +999,10 @@ static int rt_garbage_collect(struct dst_ops *ops)
 				if (!rt_is_expired(rth) &&
 					!rt_may_expire(rth, tmo, expire)) {
 					tmo >>= 1;
-					rthp = &rth->u.dst.rt_next;
+					rthp = &rth->dst.rt_next;
 					continue;
 				}
-				*rthp = rth->u.dst.rt_next;
+				*rthp = rth->dst.rt_next;
 				rt_free(rth);
 				goal--;
 			}
@@ -1068,7 +1068,7 @@ static int slow_chain_length(const struct rtable *head)
 
 	while (rth) {
 		length += has_noalias(head, rth);
-		rth = rth->u.dst.rt_next;
+		rth = rth->dst.rt_next;
 	}
 	return length >> FRACT_BITS;
 }
@@ -1090,7 +1090,7 @@ restart:
 	candp = NULL;
 	now = jiffies;
 
-	if (!rt_caching(dev_net(rt->u.dst.dev))) {
+	if (!rt_caching(dev_net(rt->dst.dev))) {
 		/*
 		 * If we're not caching, just tell the caller we
 		 * were successful and don't touch the route.  The
@@ -1108,7 +1108,7 @@ restart:
 		 */
 
 		if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
-			int err = arp_bind_neighbour(&rt->u.dst);
+			int err = arp_bind_neighbour(&rt->dst);
 			if (err) {
 				if (net_ratelimit())
 					printk(KERN_WARNING
@@ -1127,19 +1127,19 @@ restart:
 	spin_lock_bh(rt_hash_lock_addr(hash));
 	while ((rth = *rthp) != NULL) {
 		if (rt_is_expired(rth)) {
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			rt_free(rth);
 			continue;
 		}
 		if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {
 			/* Put it first */
-			*rthp = rth->u.dst.rt_next;
+			*rthp = rth->dst.rt_next;
 			/*
 			 * Since lookup is lockfree, the deletion
 			 * must be visible to another weakly ordered CPU before
 			 * the insertion at the start of the hash chain.
 			 */
-			rcu_assign_pointer(rth->u.dst.rt_next,
+			rcu_assign_pointer(rth->dst.rt_next,
 					   rt_hash_table[hash].chain);
 			/*
 			 * Since lookup is lockfree, the update writes
@@ -1147,18 +1147,18 @@ restart:
 			 */
 			rcu_assign_pointer(rt_hash_table[hash].chain, rth);
 
-			dst_use(&rth->u.dst, now);
+			dst_use(&rth->dst, now);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
 			rt_drop(rt);
 			if (rp)
 				*rp = rth;
 			else
-				skb_dst_set(skb, &rth->u.dst);
+				skb_dst_set(skb, &rth->dst);
 			return 0;
 		}
 
-		if (!atomic_read(&rth->u.dst.__refcnt)) {
+		if (!atomic_read(&rth->dst.__refcnt)) {
 			u32 score = rt_score(rth);
 
 			if (score <= min_score) {
@@ -1170,7 +1170,7 @@ restart:
 
 		chain_length++;
 
-		rthp = &rth->u.dst.rt_next;
+		rthp = &rth->dst.rt_next;
 	}
 
 	if (cand) {
@@ -1181,17 +1181,17 @@ restart:
 		 * only 2 entries per bucket. We will see.
 		 */
 		if (chain_length > ip_rt_gc_elasticity) {
-			*candp = cand->u.dst.rt_next;
+			*candp = cand->dst.rt_next;
 			rt_free(cand);
 		}
 	} else {
 		if (chain_length > rt_chain_length_max &&
 		    slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {
-			struct net *net = dev_net(rt->u.dst.dev);
+			struct net *net = dev_net(rt->dst.dev);
 			int num = ++net->ipv4.current_rt_cache_rebuild_count;
 			if (!rt_caching(net)) {
 				printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n",
-					rt->u.dst.dev->name, num);
+					rt->dst.dev->name, num);
 			}
 			rt_emergency_hash_rebuild(net);
 			spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1206,7 +1206,7 @@ restart:
 	   route or unicast forwarding path.
 	 */
 	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
-		int err = arp_bind_neighbour(&rt->u.dst);
+		int err = arp_bind_neighbour(&rt->dst);
 		if (err) {
 			spin_unlock_bh(rt_hash_lock_addr(hash));
 
@@ -1237,14 +1237,14 @@ restart:
 		}
 	}
 
-	rt->u.dst.rt_next = rt_hash_table[hash].chain;
+	rt->dst.rt_next = rt_hash_table[hash].chain;
 
 #if RT_CACHE_DEBUG >= 2
-	if (rt->u.dst.rt_next) {
+	if (rt->dst.rt_next) {
 		struct rtable *trt;
 		printk(KERN_DEBUG "rt_cache @%02x: %pI4",
 		       hash, &rt->rt_dst);
-		for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next)
+		for (trt = rt->dst.rt_next; trt; trt = trt->dst.rt_next)
 			printk(" . %pI4", &trt->rt_dst);
 		printk("\n");
 	}
@@ -1262,7 +1262,7 @@ skip_hashing:
 	if (rp)
 		*rp = rt;
 	else
-		skb_dst_set(skb, &rt->u.dst);
+		skb_dst_set(skb, &rt->dst);
 	return 0;
 }
 
@@ -1334,11 +1334,11 @@ static void rt_del(unsigned hash, struct rtable *rt)
 	ip_rt_put(rt);
 	while ((aux = *rthp) != NULL) {
 		if (aux == rt || rt_is_expired(aux)) {
-			*rthp = aux->u.dst.rt_next;
+			*rthp = aux->dst.rt_next;
 			rt_free(aux);
 			continue;
 		}
-		rthp = &aux->u.dst.rt_next;
+		rthp = &aux->dst.rt_next;
 	}
 	spin_unlock_bh(rt_hash_lock_addr(hash));
 }
@@ -1392,19 +1392,19 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
 				    rt_is_expired(rth) ||
-				    !net_eq(dev_net(rth->u.dst.dev), net)) {
-					rthp = &rth->u.dst.rt_next;
+				    !net_eq(dev_net(rth->dst.dev), net)) {
+					rthp = &rth->dst.rt_next;
 					continue;
 				}
 
 				if (rth->rt_dst != daddr ||
 				    rth->rt_src != saddr ||
-				    rth->u.dst.error ||
+				    rth->dst.error ||
 				    rth->rt_gateway != old_gw ||
-				    rth->u.dst.dev != dev)
+				    rth->dst.dev != dev)
 					break;
 
-				dst_hold(&rth->u.dst);
+				dst_hold(&rth->dst);
 
 				rt = dst_alloc(&ipv4_dst_ops);
 				if (rt == NULL) {
@@ -1414,20 +1414,20 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 
 				/* Copy all the information. */
 				*rt = *rth;
-				rt->u.dst.__use		= 1;
-				atomic_set(&rt->u.dst.__refcnt, 1);
-				rt->u.dst.child		= NULL;
-				if (rt->u.dst.dev)
-					dev_hold(rt->u.dst.dev);
+				rt->dst.__use		= 1;
+				atomic_set(&rt->dst.__refcnt, 1);
+				rt->dst.child		= NULL;
+				if (rt->dst.dev)
+					dev_hold(rt->dst.dev);
 				if (rt->idev)
 					in_dev_hold(rt->idev);
-				rt->u.dst.obsolete	= -1;
-				rt->u.dst.lastuse	= jiffies;
-				rt->u.dst.path		= &rt->u.dst;
-				rt->u.dst.neighbour	= NULL;
-				rt->u.dst.hh		= NULL;
+				rt->dst.obsolete	= -1;
+				rt->dst.lastuse	= jiffies;
+				rt->dst.path		= &rt->dst;
+				rt->dst.neighbour	= NULL;
+				rt->dst.hh		= NULL;
 #ifdef CONFIG_XFRM
-				rt->u.dst.xfrm		= NULL;
+				rt->dst.xfrm		= NULL;
 #endif
 				rt->rt_genid		= rt_genid(net);
 				rt->rt_flags		|= RTCF_REDIRECTED;
@@ -1436,23 +1436,23 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 				rt->rt_gateway		= new_gw;
 
 				/* Redirect received -> path was valid */
-				dst_confirm(&rth->u.dst);
+				dst_confirm(&rth->dst);
 
 				if (rt->peer)
 					atomic_inc(&rt->peer->refcnt);
 
-				if (arp_bind_neighbour(&rt->u.dst) ||
-				    !(rt->u.dst.neighbour->nud_state &
+				if (arp_bind_neighbour(&rt->dst) ||
+				    !(rt->dst.neighbour->nud_state &
 					    NUD_VALID)) {
-					if (rt->u.dst.neighbour)
-						neigh_event_send(rt->u.dst.neighbour, NULL);
+					if (rt->dst.neighbour)
+						neigh_event_send(rt->dst.neighbour, NULL);
 					ip_rt_put(rth);
 					rt_drop(rt);
 					goto do_next;
 				}
 
-				netevent.old = &rth->u.dst;
-				netevent.new = &rt->u.dst;
+				netevent.old = &rth->dst;
+				netevent.new = &rt->dst;
 				call_netevent_notifiers(NETEVENT_REDIRECT,
 							&netevent);
 
@@ -1488,8 +1488,8 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
 			ip_rt_put(rt);
 			ret = NULL;
 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
-			   (rt->u.dst.expires &&
-			    time_after_eq(jiffies, rt->u.dst.expires))) {
+			   (rt->dst.expires &&
+			    time_after_eq(jiffies, rt->dst.expires))) {
 			unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
 						rt->fl.oif,
 						rt_genid(dev_net(dst->dev)));
@@ -1527,7 +1527,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	int log_martians;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
 		rcu_read_unlock();
 		return;
@@ -1538,30 +1538,30 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 	/* No redirected packets during ip_rt_redirect_silence;
 	 * reset the algorithm.
 	 */
-	if (time_after(jiffies, rt->u.dst.rate_last + ip_rt_redirect_silence))
-		rt->u.dst.rate_tokens = 0;
+	if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence))
+		rt->dst.rate_tokens = 0;
 
 	/* Too many ignored redirects; do not send anything
-	 * set u.dst.rate_last to the last seen redirected packet.
+	 * set dst.rate_last to the last seen redirected packet.
 	 */
-	if (rt->u.dst.rate_tokens >= ip_rt_redirect_number) {
-		rt->u.dst.rate_last = jiffies;
+	if (rt->dst.rate_tokens >= ip_rt_redirect_number) {
+		rt->dst.rate_last = jiffies;
 		return;
 	}
 
 	/* Check for load limit; set rate_last to the latest sent
 	 * redirect.
 	 */
-	if (rt->u.dst.rate_tokens == 0 ||
+	if (rt->dst.rate_tokens == 0 ||
 	    time_after(jiffies,
-		       (rt->u.dst.rate_last +
-			(ip_rt_redirect_load << rt->u.dst.rate_tokens)))) {
+		       (rt->dst.rate_last +
+			(ip_rt_redirect_load << rt->dst.rate_tokens)))) {
 		icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
-		rt->u.dst.rate_last = jiffies;
-		++rt->u.dst.rate_tokens;
+		rt->dst.rate_last = jiffies;
+		++rt->dst.rate_tokens;
 #ifdef CONFIG_IP_ROUTE_VERBOSE
 		if (log_martians &&
-		    rt->u.dst.rate_tokens == ip_rt_redirect_number &&
+		    rt->dst.rate_tokens == ip_rt_redirect_number &&
 		    net_ratelimit())
 			printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n",
 				&rt->rt_src, rt->rt_iif,
@@ -1576,7 +1576,7 @@ static int ip_error(struct sk_buff *skb)
 	unsigned long now;
 	int code;
 
-	switch (rt->u.dst.error) {
+	switch (rt->dst.error) {
 		case EINVAL:
 		default:
 			goto out;
@@ -1585,7 +1585,7 @@ static int ip_error(struct sk_buff *skb)
 			break;
 		case ENETUNREACH:
 			code = ICMP_NET_UNREACH;
-			IP_INC_STATS_BH(dev_net(rt->u.dst.dev),
+			IP_INC_STATS_BH(dev_net(rt->dst.dev),
 					IPSTATS_MIB_INNOROUTES);
 			break;
 		case EACCES:
@@ -1594,12 +1594,12 @@ static int ip_error(struct sk_buff *skb)
 	}
 
 	now = jiffies;
-	rt->u.dst.rate_tokens += now - rt->u.dst.rate_last;
-	if (rt->u.dst.rate_tokens > ip_rt_error_burst)
-		rt->u.dst.rate_tokens = ip_rt_error_burst;
-	rt->u.dst.rate_last = now;
-	if (rt->u.dst.rate_tokens >= ip_rt_error_cost) {
-		rt->u.dst.rate_tokens -= ip_rt_error_cost;
+	rt->dst.rate_tokens += now - rt->dst.rate_last;
+	if (rt->dst.rate_tokens > ip_rt_error_burst)
+		rt->dst.rate_tokens = ip_rt_error_burst;
+	rt->dst.rate_last = now;
+	if (rt->dst.rate_tokens >= ip_rt_error_cost) {
+		rt->dst.rate_tokens -= ip_rt_error_cost;
 		icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
 	}
 
@@ -1644,7 +1644,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 			rcu_read_lock();
 			for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-			     rth = rcu_dereference(rth->u.dst.rt_next)) {
+			     rth = rcu_dereference(rth->dst.rt_next)) {
 				unsigned short mtu = new_mtu;
 
 				if (rth->fl.fl4_dst != daddr ||
@@ -1653,8 +1653,8 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 				    rth->rt_src != iph->saddr ||
 				    rth->fl.oif != ikeys[k] ||
 				    rth->fl.iif != 0 ||
-				    dst_metric_locked(&rth->u.dst, RTAX_MTU) ||
-				    !net_eq(dev_net(rth->u.dst.dev), net) ||
+				    dst_metric_locked(&rth->dst, RTAX_MTU) ||
+				    !net_eq(dev_net(rth->dst.dev), net) ||
 				    rt_is_expired(rth))
 					continue;
 
@@ -1662,22 +1662,22 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
 
 					/* BSD 4.2 compatibility hack :-( */
 					if (mtu == 0 &&
-					    old_mtu >= dst_mtu(&rth->u.dst) &&
+					    old_mtu >= dst_mtu(&rth->dst) &&
 					    old_mtu >= 68 + (iph->ihl << 2))
 						old_mtu -= iph->ihl << 2;
 
 					mtu = guess_mtu(old_mtu);
 				}
-				if (mtu <= dst_mtu(&rth->u.dst)) {
-					if (mtu < dst_mtu(&rth->u.dst)) {
-						dst_confirm(&rth->u.dst);
+				if (mtu <= dst_mtu(&rth->dst)) {
+					if (mtu < dst_mtu(&rth->dst)) {
+						dst_confirm(&rth->dst);
 						if (mtu < ip_rt_min_pmtu) {
 							mtu = ip_rt_min_pmtu;
-							rth->u.dst.metrics[RTAX_LOCK-1] |=
+							rth->dst.metrics[RTAX_LOCK-1] |=
 								(1 << RTAX_MTU);
 						}
-						rth->u.dst.metrics[RTAX_MTU-1] = mtu;
-						dst_set_expires(&rth->u.dst,
+						rth->dst.metrics[RTAX_MTU-1] = mtu;
+						dst_set_expires(&rth->dst,
 							ip_rt_mtu_expires);
 					}
 					est_mtu = mtu;
@@ -1750,7 +1750,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 	if (rt)
-		dst_set_expires(&rt->u.dst, 0);
+		dst_set_expires(&rt->dst, 0);
 }
 
 static int ip_rt_bug(struct sk_buff *skb)
@@ -1778,11 +1778,11 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
 	if (rt->fl.iif == 0)
 		src = rt->rt_src;
-	else if (fib_lookup(dev_net(rt->u.dst.dev), &rt->fl, &res) == 0) {
+	else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
 		src = FIB_RES_PREFSRC(res);
 		fib_res_put(&res);
 	} else
-		src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
+		src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
 					RT_SCOPE_UNIVERSE);
 	memcpy(addr, &src, 4);
 }
@@ -1790,10 +1790,10 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 #ifdef CONFIG_NET_CLS_ROUTE
 static void set_class_tag(struct rtable *rt, u32 tag)
 {
-	if (!(rt->u.dst.tclassid & 0xFFFF))
-		rt->u.dst.tclassid |= tag & 0xFFFF;
-	if (!(rt->u.dst.tclassid & 0xFFFF0000))
-		rt->u.dst.tclassid |= tag & 0xFFFF0000;
+	if (!(rt->dst.tclassid & 0xFFFF))
+		rt->dst.tclassid |= tag & 0xFFFF;
+	if (!(rt->dst.tclassid & 0xFFFF0000))
+		rt->dst.tclassid |= tag & 0xFFFF0000;
 }
 #endif
 
@@ -1805,30 +1805,30 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 		if (FIB_RES_GW(*res) &&
 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
 			rt->rt_gateway = FIB_RES_GW(*res);
-		memcpy(rt->u.dst.metrics, fi->fib_metrics,
-		       sizeof(rt->u.dst.metrics));
+		memcpy(rt->dst.metrics, fi->fib_metrics,
+		       sizeof(rt->dst.metrics));
 		if (fi->fib_mtu == 0) {
-			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
-			if (dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
+			rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu;
+			if (dst_metric_locked(&rt->dst, RTAX_MTU) &&
 			    rt->rt_gateway != rt->rt_dst &&
-			    rt->u.dst.dev->mtu > 576)
-				rt->u.dst.metrics[RTAX_MTU-1] = 576;
+			    rt->dst.dev->mtu > 576)
+				rt->dst.metrics[RTAX_MTU-1] = 576;
 		}
 #ifdef CONFIG_NET_CLS_ROUTE
-		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+		rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
 #endif
 	} else
-		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
-
-	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
-	if (dst_mtu(&rt->u.dst) > IP_MAX_MTU)
-		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
+		rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu;
+
+	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+		rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
+	if (dst_mtu(&rt->dst) > IP_MAX_MTU)
+		rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0)
+		rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40,
 				       ip_rt_min_advmss);
-	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40)
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+	if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40)
+		rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
 
 #ifdef CONFIG_NET_CLS_ROUTE
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1873,13 +1873,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output = ip_rt_bug;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output = ip_rt_bug;
+	rth->dst.obsolete = -1;
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -1887,13 +1887,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
-	rth->u.dst.tclassid = itag;
+	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= init_net.loopback_dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= init_net.loopback_dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif	= 0;
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
@@ -1901,13 +1901,13 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rth->rt_flags	= RTCF_MULTICAST;
 	rth->rt_type	= RTN_MULTICAST;
 	if (our) {
-		rth->u.dst.input= ip_local_deliver;
+		rth->dst.input= ip_local_deliver;
 		rth->rt_flags |= RTCF_LOCAL;
 	}
 
 #ifdef CONFIG_IP_MROUTE
 	if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
-		rth->u.dst.input = ip_mr_input;
+		rth->dst.input = ip_mr_input;
 #endif
 	RT_CACHE_STAT_INC(in_slow_mc);
 
@@ -2016,12 +2016,12 @@ static int __mkroute_input(struct sk_buff *skb,
 		goto cleanup;
 	}
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
-		rth->u.dst.flags |= DST_NOXFRM;
+		rth->dst.flags |= DST_NOXFRM;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -2031,16 +2031,16 @@ static int __mkroute_input(struct sk_buff *skb,
 	rth->rt_gateway	= daddr;
 	rth->rt_iif 	=
 		rth->fl.iif	= in_dev->dev->ifindex;
-	rth->u.dst.dev	= (out_dev)->dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= (out_dev)->dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->fl.oif 	= 0;
 	rth->rt_spec_dst= spec_dst;
 
-	rth->u.dst.obsolete = -1;
-	rth->u.dst.input = ip_forward;
-	rth->u.dst.output = ip_output;
-	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));
+	rth->dst.obsolete = -1;
+	rth->dst.input = ip_forward;
+	rth->dst.output = ip_output;
+	rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
 
 	rt_set_nexthop(rth, res, itag);
 
@@ -2074,7 +2074,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
 
 	/* put it into the cache */
 	hash = rt_hash(daddr, saddr, fl->iif,
-		       rt_genid(dev_net(rth->u.dst.dev)));
+		       rt_genid(dev_net(rth->dst.dev)));
 	return rt_intern_hash(hash, rth, NULL, skb, fl->iif);
 }
 
@@ -2197,14 +2197,14 @@ local_input:
 	if (!rth)
 		goto e_nobufs;
 
-	rth->u.dst.output= ip_rt_bug;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output= ip_rt_bug;
+	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(net);
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 	rth->fl.fl4_dst	= daddr;
 	rth->rt_dst	= daddr;
 	rth->fl.fl4_tos	= tos;
@@ -2212,20 +2212,20 @@ local_input:
 	rth->fl.fl4_src	= saddr;
 	rth->rt_src	= saddr;
 #ifdef CONFIG_NET_CLS_ROUTE
-	rth->u.dst.tclassid = itag;
+	rth->dst.tclassid = itag;
 #endif
 	rth->rt_iif	=
 	rth->fl.iif	= dev->ifindex;
-	rth->u.dst.dev	= net->loopback_dev;
-	dev_hold(rth->u.dst.dev);
-	rth->idev	= in_dev_get(rth->u.dst.dev);
+	rth->dst.dev	= net->loopback_dev;
+	dev_hold(rth->dst.dev);
+	rth->idev	= in_dev_get(rth->dst.dev);
 	rth->rt_gateway	= daddr;
 	rth->rt_spec_dst= spec_dst;
-	rth->u.dst.input= ip_local_deliver;
+	rth->dst.input= ip_local_deliver;
 	rth->rt_flags 	= flags|RTCF_LOCAL;
 	if (res.type == RTN_UNREACHABLE) {
-		rth->u.dst.input= ip_error;
-		rth->u.dst.error= -err;
+		rth->dst.input= ip_error;
+		rth->dst.error= -err;
 		rth->rt_flags 	&= ~RTCF_LOCAL;
 	}
 	rth->rt_type	= res.type;
@@ -2291,21 +2291,21 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	hash = rt_hash(daddr, saddr, iif, rt_genid(net));
 
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
-	     rth = rcu_dereference(rth->u.dst.rt_next)) {
+	     rth = rcu_dereference(rth->dst.rt_next)) {
 		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
 		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
 		     (rth->fl.iif ^ iif) |
 		     rth->fl.oif |
 		     (rth->fl.fl4_tos ^ tos)) == 0 &&
 		    rth->fl.mark == skb->mark &&
-		    net_eq(dev_net(rth->u.dst.dev), net) &&
+		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
 			if (noref) {
-				dst_use_noref(&rth->u.dst, jiffies);
-				skb_dst_set_noref(skb, &rth->u.dst);
+				dst_use_noref(&rth->dst, jiffies);
+				skb_dst_set_noref(skb, &rth->dst);
 			} else {
-				dst_use(&rth->u.dst, jiffies);
-				skb_dst_set(skb, &rth->u.dst);
+				dst_use(&rth->dst, jiffies);
+				skb_dst_set(skb, &rth->dst);
 			}
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
@@ -2412,12 +2412,12 @@ static int __mkroute_output(struct rtable **result,
 		goto cleanup;
 	}
 
-	atomic_set(&rth->u.dst.__refcnt, 1);
-	rth->u.dst.flags= DST_HOST;
+	atomic_set(&rth->dst.__refcnt, 1);
+	rth->dst.flags= DST_HOST;
 	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
-		rth->u.dst.flags |= DST_NOXFRM;
+		rth->dst.flags |= DST_NOXFRM;
 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
-		rth->u.dst.flags |= DST_NOPOLICY;
+		rth->dst.flags |= DST_NOPOLICY;
 
 	rth->fl.fl4_dst	= oldflp->fl4_dst;
 	rth->fl.fl4_tos	= tos;
@@ -2429,35 +2429,35 @@ static int __mkroute_output(struct rtable **result,
 	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
 	/* get references to the devices that are to be hold by the routing
 	   cache entry */
-	rth->u.dst.dev	= dev_out;
+	rth->dst.dev	= dev_out;
 	dev_hold(dev_out);
 	rth->idev	= in_dev_get(dev_out);
 	rth->rt_gateway = fl->fl4_dst;
 	rth->rt_spec_dst= fl->fl4_src;
 
-	rth->u.dst.output=ip_output;
-	rth->u.dst.obsolete = -1;
+	rth->dst.output=ip_output;
+	rth->dst.obsolete = -1;
 	rth->rt_genid = rt_genid(dev_net(dev_out));
 
 	RT_CACHE_STAT_INC(out_slow_tot);
 
 	if (flags & RTCF_LOCAL) {
-		rth->u.dst.input = ip_local_deliver;
+		rth->dst.input = ip_local_deliver;
 		rth->rt_spec_dst = fl->fl4_dst;
 	}
 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
 		rth->rt_spec_dst = fl->fl4_src;
 		if (flags & RTCF_LOCAL &&
 		    !(dev_out->flags & IFF_LOOPBACK)) {
-			rth->u.dst.output = ip_mc_output;
+			rth->dst.output = ip_mc_output;
 			RT_CACHE_STAT_INC(out_slow_mc);
 		}
 #ifdef CONFIG_IP_MROUTE
 		if (res->type == RTN_MULTICAST) {
 			if (IN_DEV_MFORWARD(in_dev) &&
 			    !ipv4_is_local_multicast(oldflp->fl4_dst)) {
-				rth->u.dst.input = ip_mr_input;
-				rth->u.dst.output = ip_mc_output;
+				rth->dst.input = ip_mr_input;
+				rth->dst.output = ip_mc_output;
 			}
 		}
 #endif
@@ -2712,7 +2712,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 
 	rcu_read_lock_bh();
 	for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;
-		rth = rcu_dereference_bh(rth->u.dst.rt_next)) {
+		rth = rcu_dereference_bh(rth->dst.rt_next)) {
 		if (rth->fl.fl4_dst == flp->fl4_dst &&
 		    rth->fl.fl4_src == flp->fl4_src &&
 		    rth->fl.iif == 0 &&
@@ -2720,9 +2720,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 		    rth->fl.mark == flp->mark &&
 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
 			    (IPTOS_RT_MASK | RTO_ONLINK)) &&
-		    net_eq(dev_net(rth->u.dst.dev), net) &&
+		    net_eq(dev_net(rth->dst.dev), net) &&
 		    !rt_is_expired(rth)) {
-			dst_use(&rth->u.dst, jiffies);
+			dst_use(&rth->dst, jiffies);
 			RT_CACHE_STAT_INC(out_hit);
 			rcu_read_unlock_bh();
 			*rp = rth;
@@ -2759,15 +2759,15 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		dst_alloc(&ipv4_dst_blackhole_ops);
 
 	if (rt) {
-		struct dst_entry *new = &rt->u.dst;
+		struct dst_entry *new = &rt->dst;
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
-		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
 
-		new->dev = ort->u.dst.dev;
+		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
 
@@ -2791,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 		dst_free(new);
 	}
 
-	dst_release(&(*rp)->u.dst);
+	dst_release(&(*rp)->dst);
 	*rp = rt;
 	return (rt ? 0 : -ENOMEM);
 }
@@ -2861,11 +2861,11 @@ static int rt_fill_info(struct net *net,
 		r->rtm_src_len = 32;
 		NLA_PUT_BE32(skb, RTA_SRC, rt->fl.fl4_src);
 	}
-	if (rt->u.dst.dev)
-		NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex);
+	if (rt->dst.dev)
+		NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
 #ifdef CONFIG_NET_CLS_ROUTE
-	if (rt->u.dst.tclassid)
-		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
+	if (rt->dst.tclassid)
+		NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
 	if (rt->fl.iif)
 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
@@ -2875,11 +2875,11 @@ static int rt_fill_info(struct net *net,
 	if (rt->rt_dst != rt->rt_gateway)
 		NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway);
 
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
-	error = rt->u.dst.error;
-	expires = rt->u.dst.expires ? rt->u.dst.expires - jiffies : 0;
+	error = rt->dst.error;
+	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
 		if (rt->peer->tcp_ts_stamp) {
@@ -2911,7 +2911,7 @@ static int rt_fill_info(struct net *net,
 			NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif);
 	}
 
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, id, ts, tsage,
+	if (rtnl_put_cacheinfo(skb, &rt->dst, id, ts, tsage,
 			       expires, error) < 0)
 		goto nla_put_failure;
 
@@ -2976,8 +2976,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 		local_bh_enable();
 
 		rt = skb_rtable(skb);
-		if (err == 0 && rt->u.dst.error)
-			err = -rt->u.dst.error;
+		if (err == 0 && rt->dst.error)
+			err = -rt->dst.error;
 	} else {
 		struct flowi fl = {
 			.nl_u = {
@@ -2995,7 +2995,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	if (err)
 		goto errout_free;
 
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
 
@@ -3031,12 +3031,12 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 			continue;
 		rcu_read_lock_bh();
 		for (rt = rcu_dereference_bh(rt_hash_table[h].chain), idx = 0; rt;
-		     rt = rcu_dereference_bh(rt->u.dst.rt_next), idx++) {
-			if (!net_eq(dev_net(rt->u.dst.dev), net) || idx < s_idx)
+		     rt = rcu_dereference_bh(rt->dst.rt_next), idx++) {
+			if (!net_eq(dev_net(rt->dst.dev), net) || idx < s_idx)
 				continue;
 			if (rt_is_expired(rt))
 				continue;
-			skb_dst_set_noref(skb, &rt->u.dst);
+			skb_dst_set_noref(skb, &rt->dst);
 			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 5c48124332de..02bef6aa8b30 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -354,15 +354,15 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	}
 
 	/* Try to redo what tcp_v4_send_synack did. */
-	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->u.dst, RTAX_WINDOW);
+	req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
 
 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
 				  &req->rcv_wnd, &req->window_clamp,
 				  ireq->wscale_ok, &rcv_wscale,
-				  dst_metric(&rt->u.dst, RTAX_INITRWND));
+				  dst_metric(&rt->dst, RTAX_INITRWND));
 
 	ireq->rcv_wscale  = rcv_wscale;
 
-	ret = get_cookie_sock(sk, skb, req, &rt->u.dst);
+	ret = get_cookie_sock(sk, skb, req, &rt->dst);
 out:	return ret;
 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f976af27bf0..7f9515c0379f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -237,7 +237,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
 	/* OK, now commit destination to socket.  */
 	sk->sk_gso_type = SKB_GSO_TCPV4;
-	sk_setup_caps(sk, &rt->u.dst);
+	sk_setup_caps(sk, &rt->dst);
 
 	if (!tp->write_seq)
 		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index eec4ff456e33..32e0bef60d0a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -914,7 +914,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		    !sock_flag(sk, SOCK_BROADCAST))
 			goto out;
 		if (connected)
-			sk_dst_set(sk, dst_clone(&rt->u.dst));
+			sk_dst_set(sk, dst_clone(&rt->dst));
 	}
 
 	if (msg->msg_flags&MSG_CONFIRM)
@@ -978,7 +978,7 @@ out:
 	return err;
 
 do_confirm:
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 	if (!(msg->msg_flags&MSG_PROBE) || len)
 		goto back_from_confirm;
 	err = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1705476670ef..349327092c9e 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -37,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
 		fl.fl4_src = saddr->a4;
 
 	err = __ip_route_output_key(net, &rt, &fl);
-	dst = &rt->u.dst;
+	dst = &rt->dst;
 	if (err)
 		dst = ERR_PTR(err);
 	return dst;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1a698df5706..b97bb1f30808 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -557,7 +557,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 		pr_warning("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
-	dst_release(&ifp->rt->u.dst);
+	dst_release(&ifp->rt->dst);
 
 	call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
 }
@@ -823,7 +823,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 				rt->rt6i_flags |= RTF_EXPIRES;
 			}
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 
 out:
@@ -1863,7 +1863,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 					      dev, expires, flags);
 		}
 		if (rt)
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 	}
 
 	/* Try to figure out our local address for this prefix */
@@ -4093,11 +4093,11 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		if (ifp->idev->cnf.forwarding)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
-		dst_hold(&ifp->rt->u.dst);
+		dst_hold(&ifp->rt->dst);
 
 		if (ifp->state == INET6_IFADDR_STATE_DEAD &&
 		    ip6_del_rt(ifp->rt))
-			dst_free(&ifp->rt->u.dst);
+			dst_free(&ifp->rt->dst);
 		break;
 	}
 }
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index f058fbd808c8..0e5e943446f0 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -84,7 +84,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		} else if (ishost) {
 			err = -EADDRNOTAVAIL;
 			goto error;
@@ -244,7 +244,7 @@ static void aca_put(struct ifacaddr6 *ac)
 {
 	if (atomic_dec_and_test(&ac->aca_refcnt)) {
 		in6_dev_put(ac->aca_idev);
-		dst_release(&ac->aca_rt->u.dst);
+		dst_release(&ac->aca_rt->dst);
 		kfree(ac);
 	}
 }
@@ -350,7 +350,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
 	write_unlock_bh(&idev->lock);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	dst_hold(&aca->aca_rt->u.dst);
+	dst_hold(&aca->aca_rt->dst);
 	ip6_del_rt(aca->aca_rt);
 
 	aca_put(aca);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8e44f8f9c188..b1108ede18e1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -43,8 +43,8 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
 	if (arg.result)
 		return arg.result;
 
-	dst_hold(&net->ipv6.ip6_null_entry->u.dst);
-	return &net->ipv6.ip6_null_entry->u.dst;
+	dst_hold(&net->ipv6.ip6_null_entry->dst);
+	return &net->ipv6.ip6_null_entry->dst;
 }
 
 static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
@@ -86,7 +86,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			struct in6_addr saddr;
 
 			if (ipv6_dev_get_saddr(net,
-					       ip6_dst_idev(&rt->u.dst)->dev,
+					       ip6_dst_idev(&rt->dst)->dev,
 					       &flp->fl6_dst,
 					       rt6_flags2srcprefs(flags),
 					       &saddr))
@@ -99,12 +99,12 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 		goto out;
 	}
 again:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	rt = NULL;
 	goto out;
 
 discard_pkt:
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 out:
 	arg->result = rt;
 	return rt == NULL ? -EAGAIN : 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92a122b7795d..b6a585909d35 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -165,7 +165,7 @@ static __inline__ void node_free(struct fib6_node * fn)
 static __inline__ void rt6_release(struct rt6_info *rt)
 {
 	if (atomic_dec_and_test(&rt->rt6i_ref))
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 }
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
@@ -278,7 +278,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)
 	int res;
 	struct rt6_info *rt;
 
-	for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = rt6_dump_route(rt, w->args);
 		if (res < 0) {
 			/* Frame is full, suspend walking */
@@ -619,7 +619,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 
 	ins = &fn->leaf;
 
-	for (iter = fn->leaf; iter; iter=iter->u.dst.rt6_next) {
+	for (iter = fn->leaf; iter; iter=iter->dst.rt6_next) {
 		/*
 		 *	Search for duplicates
 		 */
@@ -647,7 +647,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		if (iter->rt6i_metric > rt->rt6i_metric)
 			break;
 
-		ins = &iter->u.dst.rt6_next;
+		ins = &iter->dst.rt6_next;
 	}
 
 	/* Reset round-robin state, if necessary */
@@ -658,7 +658,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	 *	insert node
 	 */
 
-	rt->u.dst.rt6_next = iter;
+	rt->dst.rt6_next = iter;
 	*ins = rt;
 	rt->rt6i_node = fn;
 	atomic_inc(&rt->rt6i_ref);
@@ -799,7 +799,7 @@ out:
 			atomic_inc(&pn->leaf->rt6i_ref);
 		}
 #endif
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 	}
 	return err;
 
@@ -810,7 +810,7 @@ out:
 st_failure:
 	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
 		fib6_repair_tree(info->nl_net, fn);
-	dst_free(&rt->u.dst);
+	dst_free(&rt->dst);
 	return err;
 #endif
 }
@@ -1108,7 +1108,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	RT6_TRACE("fib6_del_route\n");
 
 	/* Unlink it */
-	*rtp = rt->u.dst.rt6_next;
+	*rtp = rt->dst.rt6_next;
 	rt->rt6i_node = NULL;
 	net->ipv6.rt6_stats->fib_rt_entries--;
 	net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1122,14 +1122,14 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	FOR_WALKERS(w) {
 		if (w->state == FWS_C && w->leaf == rt) {
 			RT6_TRACE("walker %p adjusted by delroute\n", w);
-			w->leaf = rt->u.dst.rt6_next;
+			w->leaf = rt->dst.rt6_next;
 			if (w->leaf == NULL)
 				w->state = FWS_U;
 		}
 	}
 	read_unlock(&fib6_walker_lock);
 
-	rt->u.dst.rt6_next = NULL;
+	rt->dst.rt6_next = NULL;
 
 	/* If it was last route, expunge its radix tree node */
 	if (fn->leaf == NULL) {
@@ -1168,7 +1168,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	struct rt6_info **rtp;
 
 #if RT6_DEBUG >= 2
-	if (rt->u.dst.obsolete>0) {
+	if (rt->dst.obsolete>0) {
 		WARN_ON(fn != NULL);
 		return -ENOENT;
 	}
@@ -1195,7 +1195,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
 	 *	Walk the leaf entries looking for ourself
 	 */
 
-	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.dst.rt6_next) {
+	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
 		if (*rtp == rt) {
 			fib6_del_route(fn, rtp, info);
 			return 0;
@@ -1334,7 +1334,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
 		.nl_net = c->net,
 	};
 
-	for (rt = w->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
 		res = c->func(rt, c->arg);
 		if (res < 0) {
 			w->leaf = rt;
@@ -1448,8 +1448,8 @@ static int fib6_age(struct rt6_info *rt, void *arg)
 		}
 		gc_args.more++;
 	} else if (rt->rt6i_flags & RTF_CACHE) {
-		if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
-		    time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
+		if (atomic_read(&rt->dst.__refcnt) == 0 &&
+		    time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
 			RT6_TRACE("aging clone %p\n", rt);
 			return -1;
 		} else if ((rt->rt6i_flags & RTF_GATEWAY) &&
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89425af0684c..d40b330c0ee6 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -698,7 +698,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		ipv6_hdr(skb)->payload_len = htons(first_len -
 						   sizeof(struct ipv6hdr));
 
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 
 		for (;;) {
 			/* Prepare header of the next frame,
@@ -726,7 +726,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 			err = output(skb);
 			if(!err)
-				IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 					      IPSTATS_MIB_FRAGCREATES);
 
 			if (err || !frag)
@@ -740,9 +740,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		kfree(tmp_hdr);
 
 		if (err == 0) {
-			IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 				      IPSTATS_MIB_FRAGOKS);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			return 0;
 		}
 
@@ -752,9 +752,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 			frag = skb;
 		}
 
-		IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
+		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 			      IPSTATS_MIB_FRAGFAILS);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return err;
 	}
 
@@ -785,7 +785,7 @@ slow_path:
 		 *	Allocate buffer.
 		 */
 
-		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
 			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 				      IPSTATS_MIB_FRAGFAILS);
@@ -798,7 +798,7 @@ slow_path:
 		 */
 
 		ip6_copy_metadata(frag, skb);
-		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+		skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
 		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 		skb_reset_network_header(frag);
 		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
@@ -1156,24 +1156,24 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 
 			/* need source address above miyazawa*/
 		}
-		dst_hold(&rt->u.dst);
-		inet->cork.dst = &rt->u.dst;
+		dst_hold(&rt->dst);
+		inet->cork.dst = &rt->dst;
 		inet->cork.fl = *fl;
 		np->cork.hop_limit = hlimit;
 		np->cork.tclass = tclass;
 		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
-		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
+		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
 		if (np->frag_size < mtu) {
 			if (np->frag_size)
 				mtu = np->frag_size;
 		}
 		inet->cork.fragsize = mtu;
-		if (dst_allfrag(rt->u.dst.path))
+		if (dst_allfrag(rt->dst.path))
 			inet->cork.flags |= IPCORK_ALLFRAG;
 		inet->cork.length = 0;
 		sk->sk_sndmsg_page = NULL;
 		sk->sk_sndmsg_off = 0;
-		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
+		exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
 			    rt->rt6i_nfheader_len;
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
@@ -1186,7 +1186,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		mtu = inet->cork.fragsize;
 	}
 
-	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
 
 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
 			(opt ? opt->opt_nflen : 0);
@@ -1224,7 +1224,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		}
 
 		if (proto == IPPROTO_UDP &&
-		    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+		    (rt->dst.dev->features & NETIF_F_UFO)) {
 
 			err = ip6_ufo_append_data(sk, getfrag, from, length,
 						  hh_len, fragheaderlen,
@@ -1270,7 +1270,7 @@ alloc_new_skb:
 
 			fraglen = datalen + fragheaderlen;
 			if ((flags & MSG_MORE) &&
-			    !(rt->u.dst.dev->features&NETIF_F_SG))
+			    !(rt->dst.dev->features&NETIF_F_SG))
 				alloclen = mtu;
 			else
 				alloclen = datalen + fragheaderlen;
@@ -1281,7 +1281,7 @@ alloc_new_skb:
 			 * because we have no idea if we're the last one.
 			 */
 			if (datalen == length + fraggap)
-				alloclen += rt->u.dst.trailer_len;
+				alloclen += rt->dst.trailer_len;
 
 			/*
 			 * We just reserve space for fragment header.
@@ -1358,7 +1358,7 @@ alloc_new_skb:
 		if (copy > length)
 			copy = length;
 
-		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+		if (!(rt->dst.dev->features&NETIF_F_SG)) {
 			unsigned int off;
 
 			off = skb->len;
@@ -1503,7 +1503,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, dst_clone(&rt->dst));
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	if (proto == IPPROTO_ICMPV6) {
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 8f39893d8081..0fd027f3f47e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -552,7 +552,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (ip_route_output_key(dev_net(skb->dev), &rt, &fl))
 		goto out;
 
-	skb2->dev = rt->u.dst.dev;
+	skb2->dev = rt->dst.dev;
 
 	/* route "incoming" packet */
 	if (rt->rt_flags & RTCF_LOCAL) {
@@ -562,7 +562,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		fl.fl4_src = eiph->saddr;
 		fl.fl4_tos = eiph->tos;
 		if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) ||
-		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+		    rt->dst.dev->type != ARPHRD_TUNNEL) {
 			ip_rt_put(rt);
 			goto out;
 		}
@@ -626,7 +626,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		icmpv6_send(skb2, rel_type, rel_code, rel_info);
 
 		if (rt)
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 
 		kfree_skb(skb2);
 	}
@@ -1135,7 +1135,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 			if (dev->mtu < IPV6_MIN_MTU)
 				dev->mtu = IPV6_MIN_MTU;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 }
 
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 3e36d1538b6e..d1444b95ad7e 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -158,7 +158,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		rt = rt6_lookup(net, addr, NULL, 0, 0);
 		if (rt) {
 			dev = rt->rt6i_dev;
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		}
 	} else
 		dev = dev_get_by_index_rcu(net, ifindex);
@@ -248,7 +248,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
 		if (rt) {
 			dev = rt->rt6i_dev;
 			dev_hold(dev);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 		}
 	} else
 		dev = dev_get_by_index_rcu(net, ifindex);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0abdc242ddb7..1fc46fc60efd 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1229,7 +1229,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 			ND_PRINTK0(KERN_ERR
 				   "ICMPv6 RA: %s() got default router without neighbour.\n",
 				   __func__);
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			in6_dev_put(in6_dev);
 			return;
 		}
@@ -1244,7 +1244,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 	if (ra_msg->icmph.icmp6_hop_limit) {
 		in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
 		if (rt)
-			rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+			rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
 	}
 
 skip_defrtr:
@@ -1363,7 +1363,7 @@ skip_linkparms:
 			in6_dev->cnf.mtu6 = mtu;
 
 			if (rt)
-				rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+				rt->dst.metrics[RTAX_MTU-1] = mtu;
 
 			rt6_mtu_change(skb->dev, mtu);
 		}
@@ -1384,7 +1384,7 @@ skip_linkparms:
 	}
 out:
 	if (rt)
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	else if (neigh)
 		neigh_release(neigh);
 	in6_dev_put(in6_dev);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 968b96490729..e677937a07fc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -611,23 +611,23 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 	int err;
 	struct rt6_info *rt = (struct rt6_info *)*dstp;
 
-	if (length > rt->u.dst.dev->mtu) {
-		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
+	if (length > rt->dst.dev->mtu) {
+		ipv6_local_error(sk, EMSGSIZE, fl, rt->dst.dev->mtu);
 		return -EMSGSIZE;
 	}
 	if (flags&MSG_PROBE)
 		goto out;
 
 	skb = sock_alloc_send_skb(sk,
-				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
+				  length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
 				  flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto error;
-	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));
+	skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 	*dstp = NULL;
 
 	skb_put(skb, length);
@@ -643,7 +643,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 
 	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
-		      rt->u.dst.dev, dst_output);
+		      rt->dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 252d76199c41..f7702850d45c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -126,16 +126,14 @@ static struct dst_ops ip6_dst_blackhole_ops = {
 };
 
 static struct rt6_info ip6_null_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -ENETUNREACH,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_discard,
-			.output		= ip6_pkt_discard_out,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -ENETUNREACH,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= ip6_pkt_discard,
+		.output		= ip6_pkt_discard_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -149,16 +147,14 @@ static int ip6_pkt_prohibit(struct sk_buff *skb);
 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 
 static struct rt6_info ip6_prohibit_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -EACCES,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= ip6_pkt_prohibit,
-			.output		= ip6_pkt_prohibit_out,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -EACCES,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= ip6_pkt_prohibit,
+		.output		= ip6_pkt_prohibit_out,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -167,16 +163,14 @@ static struct rt6_info ip6_prohibit_entry_template = {
 };
 
 static struct rt6_info ip6_blk_hole_entry_template = {
-	.u = {
-		.dst = {
-			.__refcnt	= ATOMIC_INIT(1),
-			.__use		= 1,
-			.obsolete	= -1,
-			.error		= -EINVAL,
-			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
-			.input		= dst_discard,
-			.output		= dst_discard,
-		}
+	.dst = {
+		.__refcnt	= ATOMIC_INIT(1),
+		.__use		= 1,
+		.obsolete	= -1,
+		.error		= -EINVAL,
+		.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+		.input		= dst_discard,
+		.output		= dst_discard,
 	},
 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
 	.rt6i_protocol  = RTPROT_KERNEL,
@@ -249,7 +243,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
 	if (!oif && ipv6_addr_any(saddr))
 		goto out;
 
-	for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
+	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
 		struct net_device *dev = sprt->rt6i_dev;
 
 		if (oif) {
@@ -407,10 +401,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 
 	match = NULL;
 	for (rt = rr_head; rt && rt->rt6i_metric == metric;
-	     rt = rt->u.dst.rt6_next)
+	     rt = rt->dst.rt6_next)
 		match = find_match(rt, oif, strict, &mpri, match);
 	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
-	     rt = rt->u.dst.rt6_next)
+	     rt = rt->dst.rt6_next)
 		match = find_match(rt, oif, strict, &mpri, match);
 
 	return match;
@@ -432,7 +426,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 	if (!match &&
 	    (strict & RT6_LOOKUP_F_REACHABLE)) {
-		struct rt6_info *next = rt0->u.dst.rt6_next;
+		struct rt6_info *next = rt0->dst.rt6_next;
 
 		/* no entries matched; do round-robin */
 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -517,7 +511,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 			rt->rt6i_expires = jiffies + HZ * lifetime;
 			rt->rt6i_flags |= RTF_EXPIRES;
 		}
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 	}
 	return 0;
 }
@@ -555,7 +549,7 @@ restart:
 	rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
 	BACKTRACK(net, &fl->fl6_src);
 out:
-	dst_use(&rt->u.dst, jiffies);
+	dst_use(&rt->dst, jiffies);
 	read_unlock_bh(&table->tb6_lock);
 	return rt;
 
@@ -643,7 +637,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
+		rt->dst.flags |= DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
@@ -677,7 +671,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 			if (net_ratelimit())
 				printk(KERN_WARNING
 				       "Neighbour table overflow.\n");
-			dst_free(&rt->u.dst);
+			dst_free(&rt->dst);
 			return NULL;
 		}
 		rt->rt6i_nexthop = neigh;
@@ -694,7 +688,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
-		rt->u.dst.flags |= DST_HOST;
+		rt->dst.flags |= DST_HOST;
 		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
 	}
 	return rt;
@@ -726,7 +720,7 @@ restart:
 	    rt->rt6i_flags & RTF_CACHE)
 		goto out;
 
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 
 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
@@ -739,10 +733,10 @@ restart:
 #endif
 	}
 
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	rt = nrt ? : net->ipv6.ip6_null_entry;
 
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	if (nrt) {
 		err = ip6_ins_rt(nrt);
 		if (!err)
@@ -756,7 +750,7 @@ restart:
 	 * Race condition! In the gap, when table->tb6_lock was
 	 * released someone could insert this route.  Relookup.
 	 */
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto relookup;
 
 out:
@@ -764,11 +758,11 @@ out:
 		reachable = 0;
 		goto restart_2;
 	}
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 	read_unlock_bh(&table->tb6_lock);
 out2:
-	rt->u.dst.lastuse = jiffies;
-	rt->u.dst.__use++;
+	rt->dst.lastuse = jiffies;
+	rt->dst.__use++;
 
 	return rt;
 }
@@ -835,15 +829,15 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
 	struct dst_entry *new = NULL;
 
 	if (rt) {
-		new = &rt->u.dst;
+		new = &rt->dst;
 
 		atomic_set(&new->__refcnt, 1);
 		new->__use = 1;
 		new->input = dst_discard;
 		new->output = dst_discard;
 
-		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		new->dev = ort->u.dst.dev;
+		memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		new->dev = ort->dst.dev;
 		if (new->dev)
 			dev_hold(new->dev);
 		rt->rt6i_idev = ort->rt6i_idev;
@@ -912,7 +906,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 	rt = (struct rt6_info *) skb_dst(skb);
 	if (rt) {
 		if (rt->rt6i_flags&RTF_CACHE) {
-			dst_set_expires(&rt->u.dst, 0);
+			dst_set_expires(&rt->dst, 0);
 			rt->rt6i_flags |= RTF_EXPIRES;
 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 			rt->rt6i_node->fn_sernum = -1;
@@ -986,14 +980,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->rt6i_dev	  = dev;
 	rt->rt6i_idev     = idev;
 	rt->rt6i_nexthop  = neigh;
-	atomic_set(&rt->u.dst.__refcnt, 1);
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.output  = ip6_output;
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
+	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.output  = ip6_output;
 
 #if 0	/* there's no chance to use these for ndisc */
-	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
+	rt->dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
 				? DST_HOST
 				: 0;
 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
@@ -1001,14 +995,14 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 #endif
 
 	spin_lock_bh(&icmp6_dst_lock);
-	rt->u.dst.next = icmp6_dst_gc_list;
-	icmp6_dst_gc_list = &rt->u.dst;
+	rt->dst.next = icmp6_dst_gc_list;
+	icmp6_dst_gc_list = &rt->dst;
 	spin_unlock_bh(&icmp6_dst_lock);
 
 	fib6_force_start_gc(net);
 
 out:
-	return &rt->u.dst;
+	return &rt->dst;
 }
 
 int icmp6_dst_gc(void)
@@ -1159,7 +1153,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt->u.dst.obsolete = -1;
+	rt->dst.obsolete = -1;
 	rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
 				jiffies + clock_t_to_jiffies(cfg->fc_expires) :
 				0;
@@ -1171,16 +1165,16 @@ int ip6_route_add(struct fib6_config *cfg)
 	addr_type = ipv6_addr_type(&cfg->fc_dst);
 
 	if (addr_type & IPV6_ADDR_MULTICAST)
-		rt->u.dst.input = ip6_mc_input;
+		rt->dst.input = ip6_mc_input;
 	else
-		rt->u.dst.input = ip6_forward;
+		rt->dst.input = ip6_forward;
 
-	rt->u.dst.output = ip6_output;
+	rt->dst.output = ip6_output;
 
 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
 	rt->rt6i_dst.plen = cfg->fc_dst_len;
 	if (rt->rt6i_dst.plen == 128)
-	       rt->u.dst.flags = DST_HOST;
+	       rt->dst.flags = DST_HOST;
 
 #ifdef CONFIG_IPV6_SUBTREES
 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
@@ -1208,9 +1202,9 @@ int ip6_route_add(struct fib6_config *cfg)
 				goto out;
 			}
 		}
-		rt->u.dst.output = ip6_pkt_discard_out;
-		rt->u.dst.input = ip6_pkt_discard;
-		rt->u.dst.error = -ENETUNREACH;
+		rt->dst.output = ip6_pkt_discard_out;
+		rt->dst.input = ip6_pkt_discard;
+		rt->dst.error = -ENETUNREACH;
 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
 		goto install_route;
 	}
@@ -1244,7 +1238,7 @@ int ip6_route_add(struct fib6_config *cfg)
 				goto out;
 			if (dev) {
 				if (dev != grt->rt6i_dev) {
-					dst_release(&grt->u.dst);
+					dst_release(&grt->dst);
 					goto out;
 				}
 			} else {
@@ -1255,7 +1249,7 @@ int ip6_route_add(struct fib6_config *cfg)
 			}
 			if (!(grt->rt6i_flags&RTF_GATEWAY))
 				err = 0;
-			dst_release(&grt->u.dst);
+			dst_release(&grt->dst);
 
 			if (err)
 				goto out;
@@ -1294,18 +1288,18 @@ install_route:
 					goto out;
 				}
 
-				rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
+				rt->dst.metrics[type - 1] = nla_get_u32(nla);
 			}
 		}
 	}
 
-	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	if (!dst_mtu(&rt->u.dst))
-		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
-	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.dev = dev;
+	if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
+		rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	if (!dst_mtu(&rt->dst))
+		rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+	if (!dst_metric(&rt->dst, RTAX_ADVMSS))
+		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.dev = dev;
 	rt->rt6i_idev = idev;
 	rt->rt6i_table = table;
 
@@ -1319,7 +1313,7 @@ out:
 	if (idev)
 		in6_dev_put(idev);
 	if (rt)
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 	return err;
 }
 
@@ -1336,7 +1330,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 	write_lock_bh(&table->tb6_lock);
 
 	err = fib6_del(rt, info);
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 
 	write_unlock_bh(&table->tb6_lock);
 
@@ -1369,7 +1363,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 			 &cfg->fc_src, cfg->fc_src_len);
 
 	if (fn) {
-		for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 			if (cfg->fc_ifindex &&
 			    (rt->rt6i_dev == NULL ||
 			     rt->rt6i_dev->ifindex != cfg->fc_ifindex))
@@ -1379,7 +1373,7 @@ static int ip6_route_del(struct fib6_config *cfg)
 				continue;
 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 				continue;
-			dst_hold(&rt->u.dst);
+			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 
 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
@@ -1421,7 +1415,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	read_lock_bh(&table->tb6_lock);
 	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 restart:
-	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 		/*
 		 * Current route is on-link; redirect is always invalid.
 		 *
@@ -1445,7 +1439,7 @@ restart:
 		rt = net->ipv6.ip6_null_entry;
 	BACKTRACK(net, &fl->fl6_src);
 out:
-	dst_hold(&rt->u.dst);
+	dst_hold(&rt->dst);
 
 	read_unlock_bh(&table->tb6_lock);
 
@@ -1513,10 +1507,10 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	 * Look, redirects are sent only in response to data packets,
 	 * so that this nexthop apparently is reachable. --ANK
 	 */
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 
 	/* Duplicate redirect: silently ignore. */
-	if (neigh == rt->u.dst.neighbour)
+	if (neigh == rt->dst.neighbour)
 		goto out;
 
 	nrt = ip6_rt_copy(rt);
@@ -1529,20 +1523,20 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
 	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
 	nrt->rt6i_dst.plen = 128;
-	nrt->u.dst.flags |= DST_HOST;
+	nrt->dst.flags |= DST_HOST;
 
 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
 	nrt->rt6i_nexthop = neigh_clone(neigh);
 	/* Reset pmtu, it may be better */
-	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
-	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
-							dst_mtu(&nrt->u.dst));
+	nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
+	nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
+							dst_mtu(&nrt->dst));
 
 	if (ip6_ins_rt(nrt))
 		goto out;
 
-	netevent.old = &rt->u.dst;
-	netevent.new = &nrt->u.dst;
+	netevent.old = &rt->dst;
+	netevent.new = &nrt->dst;
 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
 	if (rt->rt6i_flags&RTF_CACHE) {
@@ -1551,7 +1545,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 	}
 
 out:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 }
 
 /*
@@ -1570,7 +1564,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	if (rt == NULL)
 		return;
 
-	if (pmtu >= dst_mtu(&rt->u.dst))
+	if (pmtu >= dst_mtu(&rt->dst))
 		goto out;
 
 	if (pmtu < IPV6_MIN_MTU) {
@@ -1588,7 +1582,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	   They are sent only in response to data packets,
 	   so that this nexthop apparently is reachable. --ANK
 	 */
-	dst_confirm(&rt->u.dst);
+	dst_confirm(&rt->dst);
 
 	/* Host route. If it is static, it would be better
 	   not to override it, but add new one, so that
@@ -1596,10 +1590,10 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 	   would return automatically.
 	 */
 	if (rt->rt6i_flags & RTF_CACHE) {
-		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		rt->dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
-			rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
-		dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
+			rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+		dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 		goto out;
 	}
@@ -1615,9 +1609,9 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		nrt = rt6_alloc_clone(rt, daddr);
 
 	if (nrt) {
-		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+		nrt->dst.metrics[RTAX_MTU-1] = pmtu;
 		if (allfrag)
-			nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
+			nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 
 		/* According to RFC 1981, detecting PMTU increase shouldn't be
 		 * happened within 5 mins, the recommended timer is 10 mins.
@@ -1625,13 +1619,13 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 		 * which is 10 mins. After 10 mins the decreased pmtu is expired
 		 * and detecting PMTU increase will be automatically happened.
 		 */
-		dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
+		dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
 		ip6_ins_rt(nrt);
 	}
 out:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 }
 
 /*
@@ -1644,18 +1638,18 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 
 	if (rt) {
-		rt->u.dst.input = ort->u.dst.input;
-		rt->u.dst.output = ort->u.dst.output;
-
-		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
-		rt->u.dst.error = ort->u.dst.error;
-		rt->u.dst.dev = ort->u.dst.dev;
-		if (rt->u.dst.dev)
-			dev_hold(rt->u.dst.dev);
+		rt->dst.input = ort->dst.input;
+		rt->dst.output = ort->dst.output;
+
+		memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
+		rt->dst.error = ort->dst.error;
+		rt->dst.dev = ort->dst.dev;
+		if (rt->dst.dev)
+			dev_hold(rt->dst.dev);
 		rt->rt6i_idev = ort->rt6i_idev;
 		if (rt->rt6i_idev)
 			in6_dev_hold(rt->rt6i_idev);
-		rt->u.dst.lastuse = jiffies;
+		rt->dst.lastuse = jiffies;
 		rt->rt6i_expires = 0;
 
 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
@@ -1689,14 +1683,14 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	if (!fn)
 		goto out;
 
-	for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
 		if (rt->rt6i_dev->ifindex != ifindex)
 			continue;
 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 			continue;
 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 			continue;
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 		break;
 	}
 out:
@@ -1744,14 +1738,14 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d
 		return NULL;
 
 	write_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
+	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
 		if (dev == rt->rt6i_dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
 			break;
 	}
 	if (rt)
-		dst_hold(&rt->u.dst);
+		dst_hold(&rt->dst);
 	write_unlock_bh(&table->tb6_lock);
 	return rt;
 }
@@ -1790,9 +1784,9 @@ void rt6_purge_dflt_routers(struct net *net)
 
 restart:
 	read_lock_bh(&table->tb6_lock);
-	for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
+	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
-			dst_hold(&rt->u.dst);
+			dst_hold(&rt->dst);
 			read_unlock_bh(&table->tb6_lock);
 			ip6_del_rt(rt);
 			goto restart;
@@ -1930,15 +1924,15 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	dev_hold(net->loopback_dev);
 	in6_dev_hold(idev);
 
-	rt->u.dst.flags = DST_HOST;
-	rt->u.dst.input = ip6_input;
-	rt->u.dst.output = ip6_output;
+	rt->dst.flags = DST_HOST;
+	rt->dst.input = ip6_input;
+	rt->dst.output = ip6_output;
 	rt->rt6i_dev = net->loopback_dev;
 	rt->rt6i_idev = idev;
-	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
-	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
-	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
-	rt->u.dst.obsolete = -1;
+	rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+	rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
+	rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
+	rt->dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 	if (anycast)
@@ -1947,7 +1941,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 		rt->rt6i_flags |= RTF_LOCAL;
 	neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 	if (IS_ERR(neigh)) {
-		dst_free(&rt->u.dst);
+		dst_free(&rt->dst);
 
 		/* We are casting this because that is the return
 		 * value type.  But an errno encoded pointer is the
@@ -1962,7 +1956,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 
-	atomic_set(&rt->u.dst.__refcnt, 1);
+	atomic_set(&rt->dst.__refcnt, 1);
 
 	return rt;
 }
@@ -2033,12 +2027,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 	   PMTU discouvery.
 	 */
 	if (rt->rt6i_dev == arg->dev &&
-	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
-	    (dst_mtu(&rt->u.dst) >= arg->mtu ||
-	     (dst_mtu(&rt->u.dst) < arg->mtu &&
-	      dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
-		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
-		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
+	    !dst_metric_locked(&rt->dst, RTAX_MTU) &&
+	    (dst_mtu(&rt->dst) >= arg->mtu ||
+	     (dst_mtu(&rt->dst) < arg->mtu &&
+	      dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
+		rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
+		rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
 	}
 	return 0;
 }
@@ -2252,20 +2246,20 @@ static int rt6_fill_node(struct net *net,
 #endif
 			NLA_PUT_U32(skb, RTA_IIF, iif);
 	} else if (dst) {
-		struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
+		struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
 		struct in6_addr saddr_buf;
 		if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
 				       dst, 0, &saddr_buf) == 0)
 			NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
 
-	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+	if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
 		goto nla_put_failure;
 
-	if (rt->u.dst.neighbour)
-		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+	if (rt->dst.neighbour)
+		NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
 
-	if (rt->u.dst.dev)
+	if (rt->dst.dev)
 		NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
 
 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
@@ -2277,8 +2271,8 @@ static int rt6_fill_node(struct net *net,
 	else
 		expires = INT_MAX;
 
-	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
-			       expires, rt->u.dst.error) < 0)
+	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
+			       expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
 	return nlmsg_end(skb, nlh);
@@ -2364,7 +2358,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
 	rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
@@ -2416,12 +2410,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,
 	struct net *net = dev_net(dev);
 
 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
-		net->ipv6.ip6_null_entry->u.dst.dev = dev;
+		net->ipv6.ip6_null_entry->dst.dev = dev;
 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-		net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
+		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
-		net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
+		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
 #endif
 	}
@@ -2464,8 +2458,8 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 		seq_puts(m, "00000000000000000000000000000000");
 	}
 	seq_printf(m, " %08x %08x %08x %08x %8s\n",
-		   rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
-		   rt->u.dst.__use, rt->rt6i_flags,
+		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
+		   rt->dst.__use, rt->rt6i_flags,
 		   rt->rt6i_dev ? rt->rt6i_dev->name : "");
 	return 0;
 }
@@ -2646,9 +2640,9 @@ static int __net_init ip6_route_net_init(struct net *net)
 					   GFP_KERNEL);
 	if (!net->ipv6.ip6_null_entry)
 		goto out_ip6_dst_ops;
-	net->ipv6.ip6_null_entry->u.dst.path =
+	net->ipv6.ip6_null_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
-	net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2656,18 +2650,18 @@ static int __net_init ip6_route_net_init(struct net *net)
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_prohibit_entry)
 		goto out_ip6_null_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.path =
+	net->ipv6.ip6_prohibit_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
 					       GFP_KERNEL);
 	if (!net->ipv6.ip6_blk_hole_entry)
 		goto out_ip6_prohibit_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.path =
+	net->ipv6.ip6_blk_hole_entry->dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
@@ -2742,12 +2736,12 @@ int __init ip6_route_init(void)
 	/* Registering of the loopback is done before this portion of code,
 	 * the loopback reference in rt6_info will not be taken, do it
 	 * manually for init_net */
-	init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-	init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
-	init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
+	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
   #endif
 	ret = fib6_init();
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 702c532ec21e..4699cd3c3118 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -712,7 +712,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 		stats->tx_carrier_errors++;
 		goto tx_error_icmp;
 	}
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
 	if (tdev == dev) {
 		ip_rt_put(rt);
@@ -721,7 +721,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	}
 
 	if (df) {
-		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
 		if (mtu < 68) {
 			stats->collisions++;
@@ -780,7 +780,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags = 0;
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -829,7 +829,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
 				    .proto = IPPROTO_IPV6 };
 		struct rtable *rt;
 		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-			tdev = rt->u.dst.dev;
+			tdev = rt->dst.dev;
 			ip_rt_put(rt);
 		}
 		dev->flags |= IFF_POINTOPOINT;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 0852512d392c..226a0ae3bcfd 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -348,7 +348,7 @@ static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	sk->sk_state = TCP_ESTABLISHED;
 	inet->inet_id = jiffies;
 
-	sk_dst_set(sk, &rt->u.dst);
+	sk_dst_set(sk, &rt->dst);
 
 	write_lock_bh(&l2tp_ip_lock);
 	hlist_del_init(&sk->sk_bind_node);
@@ -496,9 +496,9 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
 				goto no_route;
 		}
-		sk_setup_caps(sk, &rt->u.dst);
+		sk_setup_caps(sk, &rt->dst);
 	}
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set(skb, dst_clone(&rt->dst));
 
 	/* Queue the packet to IP for output */
 	rc = ip_queue_xmit(skb);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 93c15a107b2c..02b078e11cf3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -90,10 +90,10 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
 					     &dest->addr.ip);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
+			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
 			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
 				  &dest->addr.ip,
-				  atomic_read(&rt->u.dst.__refcnt), rtos);
+				  atomic_read(&rt->dst.__refcnt), rtos);
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
@@ -148,10 +148,10 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
 					     &dest->addr.in6);
 				return NULL;
 			}
-			__ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst));
+			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
 			IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
 				  &dest->addr.in6,
-				  atomic_read(&rt->u.dst.__refcnt));
+				  atomic_read(&rt->dst.__refcnt));
 		}
 		spin_unlock(&dest->dst_lock);
 	} else {
@@ -198,7 +198,7 @@ do {							\
 	(skb)->ipvs_property = 1;			\
 	skb_forward_csum(skb);				\
 	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
-		(rt)->u.dst.dev, dst_output);		\
+		(rt)->dst.dev, dst_output);		\
 } while (0)
 
 
@@ -245,7 +245,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -265,7 +265,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -309,9 +309,9 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -323,13 +323,13 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return NF_STOLEN;
 	}
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -376,7 +376,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
@@ -388,12 +388,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, sizeof(struct iphdr)))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -452,9 +452,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 				 "ip_vs_nat_xmit_v6(): frag needed for");
@@ -465,12 +465,12 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -498,7 +498,7 @@ tx_error:
 	kfree_skb(skb);
 	return NF_STOLEN;
 tx_error_put:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto tx_error;
 }
 #endif
@@ -549,9 +549,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
 		goto tx_error_icmp;
 
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
+	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 	if (mtu < 68) {
 		ip_rt_put(rt);
 		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
@@ -601,7 +601,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -615,7 +615,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->daddr		=	rt->rt_dst;
 	iph->saddr		=	rt->rt_src;
 	iph->ttl		=	old_iph->ttl;
-	ip_select_ident(iph, &rt->u.dst, NULL);
+	ip_select_ident(iph, &rt->dst, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -660,12 +660,12 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!rt)
 		goto tx_error_icmp;
 
-	tdev = rt->u.dst.dev;
+	tdev = rt->dst.dev;
 
-	mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr);
+	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
 	/* TODO IPv6: do we need this check in IPv6? */
 	if (mtu < 1280) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
 		goto tx_error;
 	}
@@ -674,7 +674,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -689,7 +689,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		struct sk_buff *new_skb =
 			skb_realloc_headroom(skb, max_headroom);
 		if (!new_skb) {
-			dst_release(&rt->u.dst);
+			dst_release(&rt->dst);
 			kfree_skb(skb);
 			IP_VS_ERR_RL("%s(): no memory\n", __func__);
 			return NF_STOLEN;
@@ -707,7 +707,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/*
 	 *	Push down and install the IPIP header.
@@ -760,7 +760,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
@@ -780,7 +780,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -813,10 +813,10 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
@@ -827,13 +827,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	 */
 	skb = skb_share_check(skb, GFP_ATOMIC);
 	if (unlikely(skb == NULL)) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		return NF_STOLEN;
 	}
 
 	/* drop old route */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
@@ -888,7 +888,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
@@ -900,12 +900,12 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
@@ -963,9 +963,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error_icmp;
 
 	/* MTU checking */
-	mtu = dst_mtu(&rt->u.dst);
+	mtu = dst_mtu(&rt->dst);
 	if (skb->len > mtu) {
-		dst_release(&rt->u.dst);
+		dst_release(&rt->dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
@@ -975,12 +975,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (!skb_make_writable(skb, offset))
 		goto tx_error_put;
 
-	if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
+	if (skb_cow(skb, rt->dst.dev->hard_header_len))
 		goto tx_error_put;
 
 	/* drop the old route when skb is not shared */
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
+	skb_dst_set(skb, &rt->dst);
 
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
@@ -1001,7 +1001,7 @@ out:
 	LeaveFunction(10);
 	return rc;
 tx_error_put:
-	dst_release(&rt->u.dst);
+	dst_release(&rt->dst);
 	goto tx_error;
 }
 #endif
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6eaee7c8a337..b969025cf82f 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -734,11 +734,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 		if (!afinfo->route((struct dst_entry **)&rt1, &fl1)) {
 			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (rt1->rt_gateway == rt2->rt_gateway &&
-				    rt1->u.dst.dev  == rt2->u.dst.dev)
+				    rt1->dst.dev  == rt2->dst.dev)
 					ret = 1;
-				dst_release(&rt2->u.dst);
+				dst_release(&rt2->dst);
 			}
-			dst_release(&rt1->u.dst);
+			dst_release(&rt1->dst);
 		}
 		break;
 	}
@@ -753,11 +753,11 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 			if (!afinfo->route((struct dst_entry **)&rt2, &fl2)) {
 				if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
 					    sizeof(rt1->rt6i_gateway)) &&
-				    rt1->u.dst.dev == rt2->u.dst.dev)
+				    rt1->dst.dev == rt2->dst.dev)
 					ret = 1;
-				dst_release(&rt2->u.dst);
+				dst_release(&rt2->dst);
 			}
-			dst_release(&rt1->u.dst);
+			dst_release(&rt1->dst);
 		}
 		break;
 	}
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 497b2224536f..aadde018a072 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -61,7 +61,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 		goto out;
 
 	rcu_read_lock();
-	in_dev = __in_dev_get_rcu(rt->u.dst.dev);
+	in_dev = __in_dev_get_rcu(rt->dst.dev);
 	if (in_dev != NULL) {
 		for_primary_ifa(in_dev) {
 			if (ifa->ifa_broadcast == iph->daddr) {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 62ec021fbd50..1841388c770a 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -165,8 +165,8 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 	rcu_read_unlock();
 
 	if (rt != NULL) {
-		mtu = dst_mtu(&rt->u.dst);
-		dst_release(&rt->u.dst);
+		mtu = dst_mtu(&rt->dst);
+		dst_release(&rt->dst);
 	}
 	return mtu;
 }
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 859d9fd429c8..c77a85bbd9eb 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -77,8 +77,8 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 		return false;
 
 	skb_dst_drop(skb);
-	skb_dst_set(skb, &rt->u.dst);
-	skb->dev      = rt->u.dst.dev;
+	skb_dst_set(skb, &rt->dst);
+	skb->dev      = rt->dst.dev;
 	skb->protocol = htons(ETH_P_IP);
 	return true;
 }
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index f0f85b0123f7..9f1729bd60de 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -64,8 +64,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 		return;
 	}
 
-	peer->if_mtu = dst_mtu(&rt->u.dst);
-	dst_release(&rt->u.dst);
+	peer->if_mtu = dst_mtu(&rt->dst);
+	dst_release(&rt->dst);
 
 	_leave(" [if_mtu %u]", peer->if_mtu);
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 182749867c72..a0e1a7fdebbf 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -490,7 +490,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			  __func__, &fl.fl4_dst, &fl.fl4_src);
 
 	if (!ip_route_output_key(&init_net, &rt, &fl)) {
-		dst = &rt->u.dst;
+		dst = &rt->dst;
 	}
 
 	/* If there is no association or if a source address is passed, no
@@ -534,7 +534,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
 			fl.fl_ip_sport = laddr->a.v4.sin_port;
 			if (!ip_route_output_key(&init_net, &rt, &fl)) {
-				dst = &rt->u.dst;
+				dst = &rt->dst;
 				goto out_unlock;
 			}
 		}
-- 
cgit v1.2.3-58-ga151


From c7de2cf053420d63bac85133469c965d4b1083e1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 02:09:23 +0000
Subject: pkt_sched: gen_kill_estimator() rcu fixes

gen_kill_estimator() API is incomplete or not well documented, since
caller should make sure an RCU grace period is respected before
freeing stats_lock.

This was partially addressed in commit 5d944c640b4
(gen_estimator: deadlock fix), but same problem exist for all
gen_kill_estimator() users, if lock they use is not already RCU
protected.

A code review shows xt_RATEEST.c, act_api.c, act_police.c have this
problem. Other are ok because they use qdisc lock, already RCU
protected.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h              |  2 ++
 include/net/netfilter/xt_rateest.h |  1 +
 net/core/gen_estimator.c           |  1 +
 net/netfilter/xt_RATEEST.c         | 12 +++++++++++-
 net/sched/act_api.c                | 11 ++++++++++-
 net/sched/act_police.c             | 12 +++++++++++-
 6 files changed, 36 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index c05fd717c588..bab385f13ac3 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -20,6 +20,7 @@ struct tcf_common {
 	struct gnet_stats_queue		tcfc_qstats;
 	struct gnet_stats_rate_est	tcfc_rate_est;
 	spinlock_t			tcfc_lock;
+	struct rcu_head			tcfc_rcu;
 };
 #define tcf_next	common.tcfc_next
 #define tcf_index	common.tcfc_index
@@ -32,6 +33,7 @@ struct tcf_common {
 #define tcf_qstats	common.tcfc_qstats
 #define tcf_rate_est	common.tcfc_rate_est
 #define tcf_lock	common.tcfc_lock
+#define tcf_rcu		common.tcfc_rcu
 
 struct tcf_police {
 	struct tcf_common	common;
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index ddbf37e19616..5e1427795928 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -9,6 +9,7 @@ struct xt_rateest {
 	struct gnet_estimator		params;
 	struct gnet_stats_rate_est	rstats;
 	struct gnet_stats_basic_packed	bstats;
+	struct rcu_head			rcu;
 };
 
 extern struct xt_rateest *xt_rateest_lookup(const char *name);
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 785e5276a300..9fbe7f7429b0 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -263,6 +263,7 @@ static void __gen_kill_estimator(struct rcu_head *head)
  *
  * Removes the rate estimator specified by &bstats and &rate_est.
  *
+ * Note : Caller should respect an RCU grace period before freeing stats_lock
  */
 void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 69c01e10f8af..de079abd5bc8 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -60,13 +60,22 @@ struct xt_rateest *xt_rateest_lookup(const char *name)
 }
 EXPORT_SYMBOL_GPL(xt_rateest_lookup);
 
+static void xt_rateest_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct xt_rateest, rcu));
+}
+
 void xt_rateest_put(struct xt_rateest *est)
 {
 	mutex_lock(&xt_rateest_mutex);
 	if (--est->refcnt == 0) {
 		hlist_del(&est->list);
 		gen_kill_estimator(&est->bstats, &est->rstats);
-		kfree(est);
+		/*
+		 * gen_estimator est_timer() might access est->lock or bstats,
+		 * wait a RCU grace period before freeing 'est'
+		 */
+		call_rcu(&est->rcu, xt_rateest_free_rcu);
 	}
 	mutex_unlock(&xt_rateest_mutex);
 }
@@ -179,6 +188,7 @@ static int __init xt_rateest_tg_init(void)
 static void __exit xt_rateest_tg_fini(void)
 {
 	xt_unregister_target(&xt_rateest_tg_reg);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (xt_rateest_free_rcu) */
 }
 
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 972378f47f3c..23b25f89e7e0 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,6 +26,11 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
+static void tcf_common_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_common, tcfc_rcu));
+}
+
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
 	unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -38,7 +43,11 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 			write_unlock_bh(hinfo->lock);
 			gen_kill_estimator(&p->tcfc_bstats,
 					   &p->tcfc_rate_est);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcfc_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
 			return;
 		}
 	}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 654f73dff7c1..537a48732e9e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -97,6 +97,11 @@ nla_put_failure:
 	goto done;
 }
 
+static void tcf_police_free_rcu(struct rcu_head *head)
+{
+	kfree(container_of(head, struct tcf_police, tcf_rcu));
+}
+
 static void tcf_police_destroy(struct tcf_police *p)
 {
 	unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -113,7 +118,11 @@ static void tcf_police_destroy(struct tcf_police *p)
 				qdisc_put_rtab(p->tcfp_R_tab);
 			if (p->tcfp_P_tab)
 				qdisc_put_rtab(p->tcfp_P_tab);
-			kfree(p);
+			/*
+			 * gen_estimator est_timer() might access p->tcf_lock
+			 * or bstats, wait a RCU grace period before freeing p
+			 */
+			call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
 			return;
 		}
 	}
@@ -397,6 +406,7 @@ static void __exit
 police_cleanup_module(void)
 {
 	tcf_unregister_action(&act_police_ops);
+	rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
 }
 
 module_init(police_init_module);
-- 
cgit v1.2.3-58-ga151


From 0c78a92fbd655ab990e2799f645707f05f548e2f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Jun 2010 16:33:05 +0000
Subject: econet: fix locking

econet lacks proper locking. It holds econet_lock only when inserting or
deleting an entry in econet_sklist, not during lookups.

- convert econet_lock from rwlock to spinlock

- use econet_lock in ec_listening_socket() lookup

- use appropriate sock_hold() / sock_put() to avoid corruptions.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/econet/af_econet.c | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 2a5a8053e000..dc54bd0d083b 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -48,7 +48,7 @@
 
 static const struct proto_ops econet_ops;
 static struct hlist_head econet_sklist;
-static DEFINE_RWLOCK(econet_lock);
+static DEFINE_SPINLOCK(econet_lock);
 static DEFINE_MUTEX(econet_mutex);
 
 /* Since there are only 256 possible network numbers (or fewer, depends
@@ -98,16 +98,16 @@ struct ec_cb
 
 static void econet_remove_socket(struct hlist_head *list, struct sock *sk)
 {
-	write_lock_bh(&econet_lock);
+	spin_lock_bh(&econet_lock);
 	sk_del_node_init(sk);
-	write_unlock_bh(&econet_lock);
+	spin_unlock_bh(&econet_lock);
 }
 
 static void econet_insert_socket(struct hlist_head *list, struct sock *sk)
 {
-	write_lock_bh(&econet_lock);
+	spin_lock_bh(&econet_lock);
 	sk_add_node(sk, list);
-	write_unlock_bh(&econet_lock);
+	spin_unlock_bh(&econet_lock);
 }
 
 /*
@@ -782,15 +782,19 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char
 	struct sock *sk;
 	struct hlist_node *node;
 
+	spin_lock(&econet_lock);
 	sk_for_each(sk, node, &econet_sklist) {
 		struct econet_sock *opt = ec_sk(sk);
 		if ((opt->port == port || opt->port == 0) &&
 		    (opt->station == station || opt->station == 0) &&
-		    (opt->net == net || opt->net == 0))
+		    (opt->net == net || opt->net == 0)) {
+			sock_hold(sk);
 			goto found;
+		}
 	}
 	sk = NULL;
 found:
+	spin_unlock(&econet_lock);
 	return sk;
 }
 
@@ -852,7 +856,7 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
 {
 	struct iphdr *ip = ip_hdr(skb);
 	unsigned char stn = ntohl(ip->saddr) & 0xff;
-	struct sock *sk;
+	struct sock *sk = NULL;
 	struct sk_buff *newskb;
 	struct ec_device *edev = skb->dev->ec_ptr;
 
@@ -882,10 +886,13 @@ static void aun_incoming(struct sk_buff *skb, struct aunhdr *ah, size_t len)
 	}
 
 	aun_send_response(ip->saddr, ah->handle, 3, 0);
+	sock_put(sk);
 	return;
 
 bad:
 	aun_send_response(ip->saddr, ah->handle, 4, 0);
+	if (sk)
+		sock_put(sk);
 }
 
 /*
@@ -1050,7 +1057,7 @@ release:
 static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
 {
 	struct ec_framehdr *hdr;
-	struct sock *sk;
+	struct sock *sk = NULL;
 	struct ec_device *edev = dev->ec_ptr;
 
 	if (!net_eq(dev_net(dev), &init_net))
@@ -1085,10 +1092,12 @@ static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	if (ec_queue_packet(sk, skb, edev->net, hdr->src_stn, hdr->cb,
 			    hdr->port))
 		goto drop;
-
+	sock_put(sk);
 	return NET_RX_SUCCESS;
 
 drop:
+	if (sk)
+		sock_put(sk);
 	kfree_skb(skb);
 	return NET_RX_DROP;
 }
-- 
cgit v1.2.3-58-ga151


From 43d28b6515a6ea580a198df3e253e88236f08978 Mon Sep 17 00:00:00 2001
From: Daniel Turull <daniel.turull@gmail.com>
Date: Wed, 9 Jun 2010 22:49:57 +0000
Subject: pktgen: increasing transmission granularity

This patch increases the granularity of the rate generated by pktgen.
The previous version of pktgen uses micro seconds (udelay) resolution when it
was delayed causing gaps in the rates. It is changed to nanosecond (ndelay).
Now any rate is possible.

Also it allows to set, the desired rate in Mb/s or packets per second.

The documentation has been updated.

Signed-off-by: Daniel Turull <daniel.turull@gmail.com>
Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/pktgen.txt |  5 +++++
 net/core/pktgen.c                   | 42 +++++++++++++++++++++++++++++++++----
 2 files changed, 43 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index 61bb645d50e0..75e4fd708ccb 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -151,6 +151,8 @@ Examples:
 
  pgset stop    	          aborts injection. Also, ^C aborts generator.
 
+ pgset "rate 300M"        set rate to 300 Mb/s
+ pgset "ratep 1000000"    set rate to 1Mpps
 
 Example scripts
 ===============
@@ -241,6 +243,9 @@ src6
 flows
 flowlen
 
+rate
+ratep
+
 References:
 ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/
 ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1dacd7ba8dbb..6428653e9498 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
 #include <asm/dma.h>
 #include <asm/div64.h>		/* do_div */
 
-#define VERSION 	"2.73"
+#define VERSION	"2.74"
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -980,6 +980,40 @@ static ssize_t pktgen_if_write(struct file *file,
 			(unsigned long long) pkt_dev->delay);
 		return count;
 	}
+	if (!strcmp(name, "rate")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (!value)
+			return len;
+		pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
+		if (debug)
+			printk(KERN_INFO
+				 "pktgen: Delay set at: %llu ns\n",
+					pkt_dev->delay);
+
+		sprintf(pg_result, "OK: rate=%lu", value);
+		return count;
+	}
+	if (!strcmp(name, "ratep")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+		if (!value)
+			return len;
+		pkt_dev->delay = NSEC_PER_SEC/value;
+		if (debug)
+			printk(KERN_INFO
+				 "pktgen: Delay set at: %llu ns\n",
+					pkt_dev->delay);
+
+		sprintf(pg_result, "OK: rate=%lu", value);
+		return count;
+	}
 	if (!strcmp(name, "udp_src_min")) {
 		len = num_arg(&user_buffer[i], 10, &value);
 		if (len < 0)
@@ -2142,15 +2176,15 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer_set_expires(&t.timer, spin_until);
 
-	remaining = ktime_to_us(hrtimer_expires_remaining(&t.timer));
+	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
 	if (remaining <= 0) {
 		pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay);
 		return;
 	}
 
 	start_time = ktime_now();
-	if (remaining < 100)
-		udelay(remaining); 	/* really small just spin */
+	if (remaining < 100000)
+		ndelay(remaining);	/* really small just spin */
 	else {
 		/* see do_nanosleep */
 		hrtimer_init_sleeper(&t, current);
-- 
cgit v1.2.3-58-ga151


From be1f3c2c027cc5ad735df6a45a542ed1db7ec48b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 8 Jun 2010 07:19:54 +0000
Subject: net: Enable 64-bit net device statistics on 32-bit architectures

Use struct rtnl_link_stats64 as the statistics structure.

On 32-bit architectures, insert 32 bits of padding after/before each
field of struct net_device_stats to make its layout compatible with
struct rtnl_link_stats64.  Add an anonymous union in net_device; move
stats into the union and add struct rtnl_link_stats64 stats64.

Add net_device_ops::ndo_get_stats64, implementations of which will
return a pointer to struct rtnl_link_stats64.  Drivers that implement
this operation must not update the structure asynchronously.

Change dev_get_stats() to call ndo_get_stats64 if available, and to
return a pointer to struct rtnl_link_stats64.  Change callers of
dev_get_stats() accordingly.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 13 +++---
 include/linux/if_link.h         |  3 +-
 include/linux/netdevice.h       | 91 ++++++++++++++++++++++++-----------------
 net/8021q/vlanproc.c            | 13 +++---
 net/core/dev.c                  | 19 +++++----
 net/core/net-sysfs.c            | 12 +++---
 net/core/rtnetlink.c            |  6 +--
 7 files changed, 90 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ac4f94b7da37..a95a41b74b4e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3804,20 +3804,21 @@ static int bond_close(struct net_device *bond_dev)
 	return 0;
 }
 
-static struct net_device_stats *bond_get_stats(struct net_device *bond_dev)
+static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct net_device_stats *stats = &bond_dev->stats;
-	struct net_device_stats local_stats;
+	struct rtnl_link_stats64 *stats = &bond_dev->stats64;
+	struct rtnl_link_stats64 local_stats;
 	struct slave *slave;
 	int i;
 
-	memset(&local_stats, 0, sizeof(struct net_device_stats));
+	memset(&local_stats, 0, sizeof(local_stats));
 
 	read_lock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
-		const struct net_device_stats *sstats = dev_get_stats(slave->dev);
+		const struct rtnl_link_stats64 *sstats =
+			dev_get_stats(slave->dev);
 
 		local_stats.rx_packets += sstats->rx_packets;
 		local_stats.rx_bytes += sstats->rx_bytes;
@@ -4569,7 +4570,7 @@ static const struct net_device_ops bond_netdev_ops = {
 	.ndo_stop		= bond_close,
 	.ndo_start_xmit		= bond_start_xmit,
 	.ndo_select_queue	= bond_select_queue,
-	.ndo_get_stats		= bond_get_stats,
+	.ndo_get_stats64	= bond_get_stats,
 	.ndo_do_ioctl		= bond_do_ioctl,
 	.ndo_set_multicast_list	= bond_set_multicast_list,
 	.ndo_change_mtu		= bond_change_mtu,
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 85c812db5a3f..7fcad2e1be3d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -4,7 +4,7 @@
 #include <linux/types.h>
 #include <linux/netlink.h>
 
-/* The struct should be in sync with struct net_device_stats */
+/* This struct should be in sync with struct rtnl_link_stats64 */
 struct rtnl_link_stats {
 	__u32	rx_packets;		/* total packets received	*/
 	__u32	tx_packets;		/* total packets transmitted	*/
@@ -37,6 +37,7 @@ struct rtnl_link_stats {
 	__u32	tx_compressed;
 };
 
+/* The main device statistics structure */
 struct rtnl_link_stats64 {
 	__u64	rx_packets;		/* total packets received	*/
 	__u64	tx_packets;		/* total packets transmitted	*/
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c319f28d699d..4fbccc5f609a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -159,45 +159,49 @@ static inline bool dev_xmit_complete(int rc)
 #define MAX_HEADER (LL_MAX_HEADER + 48)
 #endif
 
-#endif  /*  __KERNEL__  */
-
 /*
- *	Network device statistics. Akin to the 2.0 ether stats but
- *	with byte counters.
+ *	Old network device statistics. Fields are native words
+ *	(unsigned long) so they can be read and written atomically.
+ *	Each field is padded to 64 bits for compatibility with
+ *	rtnl_link_stats64.
  */
 
+#if BITS_PER_LONG == 64
+#define NET_DEVICE_STATS_DEFINE(name)	unsigned long name
+#elif defined(__LITTLE_ENDIAN)
+#define NET_DEVICE_STATS_DEFINE(name)	unsigned long name, pad_ ## name
+#else
+#define NET_DEVICE_STATS_DEFINE(name)	unsigned long pad_ ## name, name
+#endif
+
 struct net_device_stats {
-	unsigned long	rx_packets;		/* total packets received	*/
-	unsigned long	tx_packets;		/* total packets transmitted	*/
-	unsigned long	rx_bytes;		/* total bytes received 	*/
-	unsigned long	tx_bytes;		/* total bytes transmitted	*/
-	unsigned long	rx_errors;		/* bad packets received		*/
-	unsigned long	tx_errors;		/* packet transmit problems	*/
-	unsigned long	rx_dropped;		/* no space in linux buffers	*/
-	unsigned long	tx_dropped;		/* no space available in linux	*/
-	unsigned long	multicast;		/* multicast packets received	*/
-	unsigned long	collisions;
-
-	/* detailed rx_errors: */
-	unsigned long	rx_length_errors;
-	unsigned long	rx_over_errors;		/* receiver ring buff overflow	*/
-	unsigned long	rx_crc_errors;		/* recved pkt with crc error	*/
-	unsigned long	rx_frame_errors;	/* recv'd frame alignment error */
-	unsigned long	rx_fifo_errors;		/* recv'r fifo overrun		*/
-	unsigned long	rx_missed_errors;	/* receiver missed packet	*/
-
-	/* detailed tx_errors */
-	unsigned long	tx_aborted_errors;
-	unsigned long	tx_carrier_errors;
-	unsigned long	tx_fifo_errors;
-	unsigned long	tx_heartbeat_errors;
-	unsigned long	tx_window_errors;
-	
-	/* for cslip etc */
-	unsigned long	rx_compressed;
-	unsigned long	tx_compressed;
+	NET_DEVICE_STATS_DEFINE(rx_packets);
+	NET_DEVICE_STATS_DEFINE(tx_packets);
+	NET_DEVICE_STATS_DEFINE(rx_bytes);
+	NET_DEVICE_STATS_DEFINE(tx_bytes);
+	NET_DEVICE_STATS_DEFINE(rx_errors);
+	NET_DEVICE_STATS_DEFINE(tx_errors);
+	NET_DEVICE_STATS_DEFINE(rx_dropped);
+	NET_DEVICE_STATS_DEFINE(tx_dropped);
+	NET_DEVICE_STATS_DEFINE(multicast);
+	NET_DEVICE_STATS_DEFINE(collisions);
+	NET_DEVICE_STATS_DEFINE(rx_length_errors);
+	NET_DEVICE_STATS_DEFINE(rx_over_errors);
+	NET_DEVICE_STATS_DEFINE(rx_crc_errors);
+	NET_DEVICE_STATS_DEFINE(rx_frame_errors);
+	NET_DEVICE_STATS_DEFINE(rx_fifo_errors);
+	NET_DEVICE_STATS_DEFINE(rx_missed_errors);
+	NET_DEVICE_STATS_DEFINE(tx_aborted_errors);
+	NET_DEVICE_STATS_DEFINE(tx_carrier_errors);
+	NET_DEVICE_STATS_DEFINE(tx_fifo_errors);
+	NET_DEVICE_STATS_DEFINE(tx_heartbeat_errors);
+	NET_DEVICE_STATS_DEFINE(tx_window_errors);
+	NET_DEVICE_STATS_DEFINE(rx_compressed);
+	NET_DEVICE_STATS_DEFINE(tx_compressed);
 };
 
+#endif  /*  __KERNEL__  */
+
 
 /* Media selection options. */
 enum {
@@ -662,10 +666,19 @@ struct netdev_rx_queue {
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
+ * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev);
  * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
- *	statistics. If not defined, the counters in dev->stats will
- *	be used.
+ *	statistics. Drivers must do one of the following:
+ *	1. Define @ndo_get_stats64 to update a rtnl_link_stats64 structure
+ *	   (which should normally be dev->stats64) and return a ponter to
+ *	   it. The structure must not be changed asynchronously.
+ *	2. Define @ndo_get_stats to update a net_device_stats64 structure
+ *	   (which should normally be dev->stats) and return a pointer to
+ *	   it. The structure may be changed asynchronously only if each
+ *	   field is written atomically.
+ *	3. Update dev->stats asynchronously and atomically, and define
+ *	   neither operation.
  *
  * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp);
  *	If device support VLAN receive accleration
@@ -720,6 +733,7 @@ struct net_device_ops {
 						   struct neigh_parms *);
 	void			(*ndo_tx_timeout) (struct net_device *dev);
 
+	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev);
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 
 	void			(*ndo_vlan_rx_register)(struct net_device *dev,
@@ -869,7 +883,10 @@ struct net_device {
 	int			ifindex;
 	int			iflink;
 
-	struct net_device_stats	stats;
+	union {
+		struct rtnl_link_stats64 stats64;
+		struct net_device_stats stats;
+	};
 
 #ifdef CONFIG_WIRELESS_EXT
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
@@ -2121,7 +2138,7 @@ extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
-extern const struct net_device_stats *dev_get_stats(struct net_device *dev);
+extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev);
 extern void		dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats);
 
 extern int		netdev_max_backlog;
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index afead353e215..df56f5ce887c 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -278,8 +278,9 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
 	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
-	const struct net_device_stats *stats;
+	const struct rtnl_link_stats64 *stats;
 	static const char fmt[] = "%30s %12lu\n";
+	static const char fmt64[] = "%30s %12llu\n";
 	int i;
 
 	if (!is_vlan_dev(vlandev))
@@ -291,12 +292,12 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 		   vlandev->name, dev_info->vlan_id,
 		   (int)(dev_info->flags & 1), vlandev->priv_flags);
 
-	seq_printf(seq, fmt, "total frames received", stats->rx_packets);
-	seq_printf(seq, fmt, "total bytes received", stats->rx_bytes);
-	seq_printf(seq, fmt, "Broadcast/Multicast Rcvd", stats->multicast);
+	seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
+	seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
+	seq_printf(seq, fmt64, "Broadcast/Multicast Rcvd", stats->multicast);
 	seq_puts(seq, "\n");
-	seq_printf(seq, fmt, "total frames transmitted", stats->tx_packets);
-	seq_printf(seq, fmt, "total bytes transmitted", stats->tx_bytes);
+	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
+	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
 	seq_printf(seq, fmt, "total headroom inc",
 		   dev_info->cnt_inc_headroom_on_tx);
 	seq_printf(seq, fmt, "total encap on xmit",
diff --git a/net/core/dev.c b/net/core/dev.c
index 277844901ce3..a1abc10db08a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3701,10 +3701,10 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	const struct net_device_stats *stats = dev_get_stats(dev);
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
 
-	seq_printf(seq, "%6s: %7lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
-		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
+	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
 		   dev->name, stats->rx_bytes, stats->rx_packets,
 		   stats->rx_errors,
 		   stats->rx_dropped + stats->rx_missed_errors,
@@ -5281,18 +5281,21 @@ EXPORT_SYMBOL(dev_txq_stats_fold);
  *	@dev: device to get statistics from
  *
  *	Get network statistics from device. The device driver may provide
- *	its own method by setting dev->netdev_ops->get_stats; otherwise
- *	the internal statistics structure is used.
+ *	its own method by setting dev->netdev_ops->get_stats64 or
+ *	dev->netdev_ops->get_stats; otherwise the internal statistics
+ *	structure is used.
  */
-const struct net_device_stats *dev_get_stats(struct net_device *dev)
+const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
+	if (ops->ndo_get_stats64)
+		return ops->ndo_get_stats64(dev);
 	if (ops->ndo_get_stats)
-		return ops->ndo_get_stats(dev);
+		return (struct rtnl_link_stats64 *)ops->ndo_get_stats(dev);
 
 	dev_txq_stats_fold(dev, &dev->stats);
-	return &dev->stats;
+	return &dev->stats64;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 99e7052d7323..ea3bb4c3b87d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -29,6 +29,7 @@ static const char fmt_hex[] = "%#x\n";
 static const char fmt_long_hex[] = "%#lx\n";
 static const char fmt_dec[] = "%d\n";
 static const char fmt_ulong[] = "%lu\n";
+static const char fmt_u64[] = "%llu\n";
 
 static inline int dev_isalive(const struct net_device *dev)
 {
@@ -324,14 +325,13 @@ static ssize_t netstat_show(const struct device *d,
 	struct net_device *dev = to_net_dev(d);
 	ssize_t ret = -EINVAL;
 
-	WARN_ON(offset > sizeof(struct net_device_stats) ||
-			offset % sizeof(unsigned long) != 0);
+	WARN_ON(offset > sizeof(struct rtnl_link_stats64) ||
+			offset % sizeof(u64) != 0);
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		const struct net_device_stats *stats = dev_get_stats(dev);
-		ret = sprintf(buf, fmt_ulong,
-			      *(unsigned long *)(((u8 *) stats) + offset));
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
 	return ret;
@@ -343,7 +343,7 @@ static ssize_t show_##name(struct device *d,				\
 			   struct device_attribute *attr, char *buf) 	\
 {									\
 	return netstat_show(d, attr, buf,				\
-			    offsetof(struct net_device_stats, name));	\
+			    offsetof(struct rtnl_link_stats64, name));	\
 }									\
 static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1a2af24e9e3d..e645778e9b7e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -579,7 +579,7 @@ static unsigned int rtnl_dev_combine_flags(const struct net_device *dev,
 }
 
 static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
-				 const struct net_device_stats *b)
+				 const struct rtnl_link_stats64 *b)
 {
 	a->rx_packets = b->rx_packets;
 	a->tx_packets = b->tx_packets;
@@ -610,7 +610,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 }
 
-static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
+static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
 {
 	struct rtnl_link_stats64 a;
 
@@ -791,7 +791,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
-	const struct net_device_stats *stats;
+	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr;
 
 	nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
-- 
cgit v1.2.3-58-ga151


From 0b5c25e8ac3a60bd01a52ca7405ba96aec8c16be Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 8 Jun 2010 08:23:01 +0000
Subject: irttp: Print device parameters and statistics as unsigned

Device statistics have type unsigned long and several of the
device-specific parameters printed here have type __u32.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irttp.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index 47db1d8a0d92..285761e77d90 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1853,23 +1853,23 @@ static int irttp_seq_show(struct seq_file *seq, void *v)
 		   self->remote_credit);
 	seq_printf(seq, "send credit: %d\n",
 		   self->send_credit);
-	seq_printf(seq, "  tx packets: %ld, ",
+	seq_printf(seq, "  tx packets: %lu, ",
 		   self->stats.tx_packets);
-	seq_printf(seq, "rx packets: %ld, ",
+	seq_printf(seq, "rx packets: %lu, ",
 		   self->stats.rx_packets);
-	seq_printf(seq, "tx_queue len: %d ",
+	seq_printf(seq, "tx_queue len: %u ",
 		   skb_queue_len(&self->tx_queue));
-	seq_printf(seq, "rx_queue len: %d\n",
+	seq_printf(seq, "rx_queue len: %u\n",
 		   skb_queue_len(&self->rx_queue));
 	seq_printf(seq, "  tx_sdu_busy: %s, ",
 		   self->tx_sdu_busy? "TRUE":"FALSE");
 	seq_printf(seq, "rx_sdu_busy: %s\n",
 		   self->rx_sdu_busy? "TRUE":"FALSE");
-	seq_printf(seq, "  max_seg_size: %d, ",
+	seq_printf(seq, "  max_seg_size: %u, ",
 		   self->max_seg_size);
-	seq_printf(seq, "tx_max_sdu_size: %d, ",
+	seq_printf(seq, "tx_max_sdu_size: %u, ",
 		   self->tx_max_sdu_size);
-	seq_printf(seq, "rx_max_sdu_size: %d\n",
+	seq_printf(seq, "rx_max_sdu_size: %u\n",
 		   self->rx_max_sdu_size);
 
 	seq_printf(seq, "  Used by (%s)\n\n",
-- 
cgit v1.2.3-58-ga151


From e897082fe7a5b591dc4dd5599ac39081a7c8e482 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Sun, 13 Jun 2010 10:36:30 +0000
Subject: net: fix deliver_no_wcard regression on loopback device

deliver_no_wcard is not being set in skb_copy_header.
In the skb_cloned case it is not being cleared and
may cause the skb to be dropped when the loopback device
pushes it back up the stack.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9f07e749d7b1..bcf2fa3e0ddc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -532,6 +532,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->ip_summed		= old->ip_summed;
 	skb_copy_queue_mapping(new, old);
 	new->priority		= old->priority;
+	new->deliver_no_wcard	= old->deliver_no_wcard;
 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
 	new->ipvs_property	= old->ipvs_property;
 #endif
-- 
cgit v1.2.3-58-ga151


From e8d15e6460cb0eea00f2574a80d94496943403ba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 13 Jun 2010 10:50:46 +0000
Subject: net: rxhash already set in __copy_skb_header

No need to copy rxhash again in __skb_clone()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bcf2fa3e0ddc..34432b4e96bb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -570,7 +570,6 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(len);
 	C(data_len);
 	C(mac_len);
-	C(rxhash);
 	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 	n->cloned = 1;
 	n->nohdr = 0;
-- 
cgit v1.2.3-58-ga151


From f5c5440d40a24c5dc8030cde0a03debe87de4afb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 16:15:23 +0200
Subject: netfilter: nfnetlink_log: RCU conversion, part 2

- must use atomic_inc_not_zero() in instance_lookup_get()

- must use hlist_add_head_rcu() instead of hlist_add_head()

- must use hlist_del_rcu() instead of hlist_del()

- Introduce NFULNL_COPY_DISABLED to stop lockless reader from using an
instance, before we do final instance_put() on it.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink_log.h |  1 +
 net/netfilter/nfnetlink_log.c           | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h
index d3bab7a2c9b7..1d0b84aa1d42 100644
--- a/include/linux/netfilter/nfnetlink_log.h
+++ b/include/linux/netfilter/nfnetlink_log.h
@@ -89,6 +89,7 @@ enum nfulnl_attr_config {
 #define NFULNL_COPY_NONE	0x00
 #define NFULNL_COPY_META	0x01
 #define NFULNL_COPY_PACKET	0x02
+#define NFULNL_COPY_DISABLED	0x03
 
 #define NFULNL_CFG_F_SEQ	0x0001
 #define NFULNL_CFG_F_SEQ_GLOBAL	0x0002
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 8ec23ec568e7..fb86a51bb65a 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -109,8 +109,8 @@ instance_lookup_get(u_int16_t group_num)
 
 	rcu_read_lock_bh();
 	inst = __instance_lookup(group_num);
-	if (inst)
-		instance_get(inst);
+	if (inst && !atomic_inc_not_zero(&inst->use))
+		inst = NULL;
 	rcu_read_unlock_bh();
 
 	return inst;
@@ -171,7 +171,7 @@ instance_create(u_int16_t group_num, int pid)
 	inst->copy_mode 	= NFULNL_COPY_PACKET;
 	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;
 
-	hlist_add_head(&inst->hlist,
+	hlist_add_head_rcu(&inst->hlist,
 		       &instance_table[instance_hashfn(group_num)]);
 
 	spin_unlock_bh(&instances_lock);
@@ -185,18 +185,23 @@ out_unlock:
 
 static void __nfulnl_flush(struct nfulnl_instance *inst);
 
+/* called with BH disabled */
 static void
 __instance_destroy(struct nfulnl_instance *inst)
 {
 	/* first pull it out of the global list */
-	hlist_del(&inst->hlist);
+	hlist_del_rcu(&inst->hlist);
 
 	/* then flush all pending packets from skb */
 
-	spin_lock_bh(&inst->lock);
+	spin_lock(&inst->lock);
+
+	/* lockless readers wont be able to use us */
+	inst->copy_mode = NFULNL_COPY_DISABLED;
+
 	if (inst->skb)
 		__nfulnl_flush(inst);
-	spin_unlock_bh(&inst->lock);
+	spin_unlock(&inst->lock);
 
 	/* and finally put the refcount */
 	instance_put(inst);
@@ -624,6 +629,7 @@ nfulnl_log_packet(u_int8_t pf,
 		size += nla_total_size(data_len);
 		break;
 
+	case NFULNL_COPY_DISABLED:
 	default:
 		goto unlock_and_release;
 	}
-- 
cgit v1.2.3-58-ga151


From c86ee67c7c4bbab2818f653eb00a70671821624a Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Mon, 14 Jun 2010 16:20:02 +0200
Subject: netfilter: kill redundant check code in which setting ip_summed value

If the returned csum value is 0, We has set ip_summed with
CHECKSUM_UNNECESSARY flag in __skb_checksum_complete_head().

So this patch kills the check and changes to return to upper
caller directly.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter.c | 4 +---
 net/ipv6/netfilter.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 07de855e2175..acd1ea87ba51 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -212,9 +212,7 @@ static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
 		skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol,
 					       skb->len - dataoff, 0);
 		skb->ip_summed = CHECKSUM_NONE;
-		csum = __skb_checksum_complete_head(skb, dataoff + len);
-		if (!csum)
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		return __skb_checksum_complete_head(skb, dataoff + len);
 	}
 	return csum;
 }
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index a74951c039b6..7155b2451d7c 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -151,9 +151,7 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
 							 protocol,
 							 csum_sub(0, hsum)));
 		skb->ip_summed = CHECKSUM_NONE;
-		csum = __skb_checksum_complete_head(skb, dataoff + len);
-		if (!csum)
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		return __skb_checksum_complete_head(skb, dataoff + len);
 	}
 	return csum;
 };
-- 
cgit v1.2.3-58-ga151


From 841a5940eb872d70dad2b9ee7f946d8fd13a8c22 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Mon, 14 Jun 2010 16:28:23 +0200
Subject: netfilter: defrag: remove one redundant atomic ops

Instead of doing one atomic operation per frag, we can factorize them.
Reported from Eric Dumazet.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6fb890187de0..bc5b86d477c1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -442,7 +442,6 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &nf_init_frags.mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -452,8 +451,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &nf_init_frags.mem);
 	}
+	atomic_sub(head->truesize, &nf_init_frags.mem);
 
 	head->next = NULL;
 	head->dev = dev;
-- 
cgit v1.2.3-58-ga151


From 0b041f8d1e6fb11a6134d37230da8c2182f99110 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Mon, 14 Jun 2010 16:30:47 +0200
Subject: netfilter: defrag: kill unused work parameter of frag_kfree_skb()

The parameter (work) is unused, remove it.
Reported from Eric Dumazet.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/nf_conntrack_reasm.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index bc5b86d477c1..9254008602d4 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -114,10 +114,8 @@ static void nf_skb_free(struct sk_buff *skb)
 }
 
 /* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
+static void frag_kfree_skb(struct sk_buff *skb)
 {
-	if (work)
-		*work -= skb->truesize;
 	atomic_sub(skb->truesize, &nf_init_frags.mem);
 	nf_skb_free(skb);
 	kfree_skb(skb);
@@ -335,7 +333,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 				fq->q.fragments = next;
 
 			fq->q.meat -= free_it->len;
-			frag_kfree_skb(free_it, NULL);
+			frag_kfree_skb(free_it);
 		}
 	}
 
-- 
cgit v1.2.3-58-ga151


From f6bc7d9e4760324258ad5f5d147e79db8442842e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 04:39:27 +0000
Subject: ipv6: avoid two atomics in ipv6_rthdr_rcv()

Use __in6_dev_get() instead of in6_dev_get()/in6_dev_put()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/exthdrs.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 853a633a94d4..262f105d23b9 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -312,6 +312,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
   Routing header.
  ********************************/
 
+/* called with rcu_read_lock() */
 static int ipv6_rthdr_rcv(struct sk_buff *skb)
 {
 	struct inet6_skb_parm *opt = IP6CB(skb);
@@ -324,12 +325,9 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
 	struct net *net = dev_net(skb->dev);
 	int accept_source_route = net->ipv6.devconf_all->accept_source_route;
 
-	idev = in6_dev_get(skb->dev);
-	if (idev) {
-		if (accept_source_route > idev->cnf.accept_source_route)
-			accept_source_route = idev->cnf.accept_source_route;
-		in6_dev_put(idev);
-	}
+	idev = __in6_dev_get(skb->dev);
+	if (idev && accept_source_route > idev->cnf.accept_source_route)
+		accept_source_route = idev->cnf.accept_source_route;
 
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) ||
 	    !pskb_may_pull(skb, (skb_transport_offset(skb) +
-- 
cgit v1.2.3-58-ga151


From c68f24cc354050415c5ea543cd19ea5424463a2f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 04:46:20 +0000
Subject: ipv6: RCU changes in ipv6_get_mtu() and ip6_dst_hoplimit()

Use RCU to avoid atomic ops on idev refcnt in ipv6_get_mtu()
and ip6_dst_hoplimit()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f7702850d45c..8f2d0400cf8a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1084,11 +1084,11 @@ static int ipv6_get_mtu(struct net_device *dev)
 	int mtu = IPV6_MIN_MTU;
 	struct inet6_dev *idev;
 
-	idev = in6_dev_get(dev);
-	if (idev) {
+	rcu_read_lock();
+	idev = __in6_dev_get(dev);
+	if (idev)
 		mtu = idev->cnf.mtu6;
-		in6_dev_put(idev);
-	}
+	rcu_read_unlock();
 	return mtu;
 }
 
@@ -1097,12 +1097,15 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
 	int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
 	if (hoplimit < 0) {
 		struct net_device *dev = dst->dev;
-		struct inet6_dev *idev = in6_dev_get(dev);
-		if (idev) {
+		struct inet6_dev *idev;
+
+		rcu_read_lock();
+		idev = __in6_dev_get(dev);
+		if (idev)
 			hoplimit = idev->cnf.hop_limit;
-			in6_dev_put(idev);
-		} else
+		else
 			hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
+		rcu_read_unlock();
 	}
 	return hoplimit;
 }
-- 
cgit v1.2.3-58-ga151


From d6cc1d642de9284cb26488ea390d915b50ee2504 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Jun 2010 19:35:21 +0000
Subject: inetpeer: various changes

Try to reduce cache line contentions in peer management, to reduce IP
defragmentation overhead.

- peer_fake_node is marked 'const' to make sure its not modified.
  (tested with CONFIG_DEBUG_RODATA=y)

- Group variables in two structures to reduce number of dirtied cache
lines. One named "peers" for avl tree root, its number of entries, and
associated lock. (candidate for RCU conversion)

- A second one named "unused_peers" for unused list and its lock

- Add a !list_empty() test in unlink_from_unused() to avoid taking lock
when entry is not unused.

- Use atomic_dec_and_lock() in inet_putpeer() to avoid taking lock in
some cases.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 94 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 38 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 6bcfe52a9c87..035673fd42d4 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -70,17 +70,25 @@
 static struct kmem_cache *peer_cachep __read_mostly;
 
 #define node_height(x) x->avl_height
-static struct inet_peer peer_fake_node = {
-	.avl_left	= &peer_fake_node,
-	.avl_right	= &peer_fake_node,
+
+#define peer_avl_empty ((struct inet_peer *)&peer_fake_node)
+static const struct inet_peer peer_fake_node = {
+	.avl_left	= peer_avl_empty,
+	.avl_right	= peer_avl_empty,
 	.avl_height	= 0
 };
-#define peer_avl_empty (&peer_fake_node)
-static struct inet_peer *peer_root = peer_avl_empty;
-static DEFINE_RWLOCK(peer_pool_lock);
+
+static struct {
+	struct inet_peer *root;
+	rwlock_t	lock;
+	int		total;
+} peers = {
+	.root		= peer_avl_empty,
+	.lock		= __RW_LOCK_UNLOCKED(peers.lock),
+	.total		= 0,
+};
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
 
-static int peer_total;
 /* Exported for sysctl_net_ipv4.  */
 int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
 					 * aggressively at this stage */
@@ -89,8 +97,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min
 int inet_peer_gc_mintime __read_mostly = 10 * HZ;
 int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
 
-static LIST_HEAD(unused_peers);
-static DEFINE_SPINLOCK(inet_peer_unused_lock);
+static struct {
+	struct list_head	list;
+	spinlock_t		lock;
+} unused_peers = {
+	.list			= LIST_HEAD_INIT(unused_peers.list),
+	.lock			= __SPIN_LOCK_UNLOCKED(unused_peers.lock),
+};
 
 static void peer_check_expire(unsigned long dummy);
 static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
@@ -131,9 +144,11 @@ void __init inet_initpeers(void)
 /* Called with or without local BH being disabled. */
 static void unlink_from_unused(struct inet_peer *p)
 {
-	spin_lock_bh(&inet_peer_unused_lock);
-	list_del_init(&p->unused);
-	spin_unlock_bh(&inet_peer_unused_lock);
+	if (!list_empty(&p->unused)) {
+		spin_lock_bh(&unused_peers.lock);
+		list_del_init(&p->unused);
+		spin_unlock_bh(&unused_peers.lock);
+	}
 }
 
 /*
@@ -146,9 +161,9 @@ static void unlink_from_unused(struct inet_peer *p)
 	struct inet_peer *u, **v;				\
 	if (_stack != NULL) {					\
 		stackptr = _stack;				\
-		*stackptr++ = &peer_root;			\
+		*stackptr++ = &peers.root;			\
 	}							\
-	for (u = peer_root; u != peer_avl_empty; ) {		\
+	for (u = peers.root; u != peer_avl_empty; ) {		\
 		if (_daddr == u->v4daddr)			\
 			break;					\
 		if ((__force __u32)_daddr < (__force __u32)u->v4daddr)	\
@@ -262,7 +277,7 @@ do {								\
 	n->avl_right = peer_avl_empty;				\
 	**--stackptr = n;					\
 	peer_avl_rebalance(stack, stackptr);			\
-} while(0)
+} while (0)
 
 /* May be called with local BH enabled. */
 static void unlink_from_pool(struct inet_peer *p)
@@ -271,7 +286,7 @@ static void unlink_from_pool(struct inet_peer *p)
 
 	do_free = 0;
 
-	write_lock_bh(&peer_pool_lock);
+	write_lock_bh(&peers.lock);
 	/* Check the reference counter.  It was artificially incremented by 1
 	 * in cleanup() function to prevent sudden disappearing.  If the
 	 * reference count is still 1 then the node is referenced only as `p'
@@ -303,10 +318,10 @@ static void unlink_from_pool(struct inet_peer *p)
 			delp[1] = &t->avl_left; /* was &p->avl_left */
 		}
 		peer_avl_rebalance(stack, stackptr);
-		peer_total--;
+		peers.total--;
 		do_free = 1;
 	}
-	write_unlock_bh(&peer_pool_lock);
+	write_unlock_bh(&peers.lock);
 
 	if (do_free)
 		kmem_cache_free(peer_cachep, p);
@@ -326,16 +341,16 @@ static int cleanup_once(unsigned long ttl)
 	struct inet_peer *p = NULL;
 
 	/* Remove the first entry from the list of unused nodes. */
-	spin_lock_bh(&inet_peer_unused_lock);
-	if (!list_empty(&unused_peers)) {
+	spin_lock_bh(&unused_peers.lock);
+	if (!list_empty(&unused_peers.list)) {
 		__u32 delta;
 
-		p = list_first_entry(&unused_peers, struct inet_peer, unused);
+		p = list_first_entry(&unused_peers.list, struct inet_peer, unused);
 		delta = (__u32)jiffies - p->dtime;
 
 		if (delta < ttl) {
 			/* Do not prune fresh entries. */
-			spin_unlock_bh(&inet_peer_unused_lock);
+			spin_unlock_bh(&unused_peers.lock);
 			return -1;
 		}
 
@@ -345,7 +360,7 @@ static int cleanup_once(unsigned long ttl)
 		 * before unlink_from_pool() call. */
 		atomic_inc(&p->refcnt);
 	}
-	spin_unlock_bh(&inet_peer_unused_lock);
+	spin_unlock_bh(&unused_peers.lock);
 
 	if (p == NULL)
 		/* It means that the total number of USED entries has
@@ -364,11 +379,11 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
 
 	/* Look up for the address quickly. */
-	read_lock_bh(&peer_pool_lock);
+	read_lock_bh(&peers.lock);
 	p = lookup(daddr, NULL);
 	if (p != peer_avl_empty)
 		atomic_inc(&p->refcnt);
-	read_unlock_bh(&peer_pool_lock);
+	read_unlock_bh(&peers.lock);
 
 	if (p != peer_avl_empty) {
 		/* The existing node has been found. */
@@ -390,7 +405,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	atomic_set(&n->ip_id_count, secure_ip_id(daddr));
 	n->tcp_ts_stamp = 0;
 
-	write_lock_bh(&peer_pool_lock);
+	write_lock_bh(&peers.lock);
 	/* Check if an entry has suddenly appeared. */
 	p = lookup(daddr, stack);
 	if (p != peer_avl_empty)
@@ -399,10 +414,10 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 	/* Link the node. */
 	link_to_pool(n);
 	INIT_LIST_HEAD(&n->unused);
-	peer_total++;
-	write_unlock_bh(&peer_pool_lock);
+	peers.total++;
+	write_unlock_bh(&peers.lock);
 
-	if (peer_total >= inet_peer_threshold)
+	if (peers.total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
 		cleanup_once(0);
 
@@ -411,7 +426,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create)
 out_free:
 	/* The appropriate node is already in the pool. */
 	atomic_inc(&p->refcnt);
-	write_unlock_bh(&peer_pool_lock);
+	write_unlock_bh(&peers.lock);
 	/* Remove the entry from unused list if it was there. */
 	unlink_from_unused(p);
 	/* Free preallocated the preallocated node. */
@@ -425,12 +440,12 @@ static void peer_check_expire(unsigned long dummy)
 	unsigned long now = jiffies;
 	int ttl;
 
-	if (peer_total >= inet_peer_threshold)
+	if (peers.total >= inet_peer_threshold)
 		ttl = inet_peer_minttl;
 	else
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
-					peer_total / inet_peer_threshold * HZ;
+					peers.total / inet_peer_threshold * HZ;
 	while (!cleanup_once(ttl)) {
 		if (jiffies != now)
 			break;
@@ -439,22 +454,25 @@ static void peer_check_expire(unsigned long dummy)
 	/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
 	 * interval depending on the total number of entries (more entries,
 	 * less interval). */
-	if (peer_total >= inet_peer_threshold)
+	if (peers.total >= inet_peer_threshold)
 		peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
 	else
 		peer_periodic_timer.expires = jiffies
 			+ inet_peer_gc_maxtime
 			- (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
-				peer_total / inet_peer_threshold * HZ;
+				peers.total / inet_peer_threshold * HZ;
 	add_timer(&peer_periodic_timer);
 }
 
 void inet_putpeer(struct inet_peer *p)
 {
-	spin_lock_bh(&inet_peer_unused_lock);
-	if (atomic_dec_and_test(&p->refcnt)) {
-		list_add_tail(&p->unused, &unused_peers);
+	local_bh_disable();
+
+	if (atomic_dec_and_lock(&p->refcnt, &unused_peers.lock)) {
+		list_add_tail(&p->unused, &unused_peers.list);
 		p->dtime = (__u32)jiffies;
+		spin_unlock(&unused_peers.lock);
 	}
-	spin_unlock_bh(&inet_peer_unused_lock);
+
+	local_bh_enable();
 }
-- 
cgit v1.2.3-58-ga151


From d73f33b168831e53972fbf7c85db87950a41436c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 13:08:51 +0200
Subject: netfilter: CLUSTERIP: RCU conversion

- clusterip_lock becomes a spinlock
- lockless lookups
- kfree() deferred after RCU grace period
- rcu_barrier_bh() inserted in clusterip_tg_exit()

v2)
- As Patrick pointed out, we use atomic_inc_not_zero() in
clusterip_config_find_get().
- list_add_rcu() and list_del_rcu() variants are used.
- atomic_dec_and_lock() used in clusterip_config_entry_put()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 48 +++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index f91c94b9a790..64d0875f5192 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -53,12 +53,13 @@ struct clusterip_config {
 #endif
 	enum clusterip_hashmode hash_mode;	/* which hashing mode */
 	u_int32_t hash_initval;			/* hash initialization */
+	struct rcu_head rcu;
 };
 
 static LIST_HEAD(clusterip_configs);
 
 /* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+static DEFINE_SPINLOCK(clusterip_lock);
 
 #ifdef CONFIG_PROC_FS
 static const struct file_operations clusterip_proc_fops;
@@ -71,11 +72,17 @@ clusterip_config_get(struct clusterip_config *c)
 	atomic_inc(&c->refcount);
 }
 
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+	kfree(container_of(head, struct clusterip_config, rcu));
+}
+
 static inline void
 clusterip_config_put(struct clusterip_config *c)
 {
 	if (atomic_dec_and_test(&c->refcount))
-		kfree(c);
+		call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
 }
 
 /* decrease the count of entries using/referencing this config.  If last
@@ -84,10 +91,11 @@ clusterip_config_put(struct clusterip_config *c)
 static inline void
 clusterip_config_entry_put(struct clusterip_config *c)
 {
-	write_lock_bh(&clusterip_lock);
-	if (atomic_dec_and_test(&c->entries)) {
-		list_del(&c->list);
-		write_unlock_bh(&clusterip_lock);
+	local_bh_disable();
+	if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
+		list_del_rcu(&c->list);
+		spin_unlock(&clusterip_lock);
+		local_bh_enable();
 
 		dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
@@ -100,7 +108,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 #endif
 		return;
 	}
-	write_unlock_bh(&clusterip_lock);
+	local_bh_enable();
 }
 
 static struct clusterip_config *
@@ -108,7 +116,7 @@ __clusterip_config_find(__be32 clusterip)
 {
 	struct clusterip_config *c;
 
-	list_for_each_entry(c, &clusterip_configs, list) {
+	list_for_each_entry_rcu(c, &clusterip_configs, list) {
 		if (c->clusterip == clusterip)
 			return c;
 	}
@@ -121,16 +129,15 @@ clusterip_config_find_get(__be32 clusterip, int entry)
 {
 	struct clusterip_config *c;
 
-	read_lock_bh(&clusterip_lock);
+	rcu_read_lock_bh();
 	c = __clusterip_config_find(clusterip);
-	if (!c) {
-		read_unlock_bh(&clusterip_lock);
-		return NULL;
+	if (c) {
+		if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+			c = NULL;
+		else if (entry)
+			atomic_inc(&c->entries);
 	}
-	atomic_inc(&c->refcount);
-	if (entry)
-		atomic_inc(&c->entries);
-	read_unlock_bh(&clusterip_lock);
+	rcu_read_unlock_bh();
 
 	return c;
 }
@@ -181,9 +188,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 	}
 #endif
 
-	write_lock_bh(&clusterip_lock);
-	list_add(&c->list, &clusterip_configs);
-	write_unlock_bh(&clusterip_lock);
+	spin_lock_bh(&clusterip_lock);
+	list_add_rcu(&c->list, &clusterip_configs);
+	spin_unlock_bh(&clusterip_lock);
 
 	return c;
 }
@@ -733,6 +740,9 @@ static void __exit clusterip_tg_exit(void)
 #endif
 	nf_unregister_hook(&cip_arp_ops);
 	xt_unregister_target(&clusterip_tg_reg);
+
+	/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+	rcu_barrier_bh();
 }
 
 module_init(clusterip_tg_init);
-- 
cgit v1.2.3-58-ga151


From 0902b469bd25065aa0688c3cee6f11744c817e7c Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@nokia.com>
Date: Tue, 15 Jun 2010 15:04:00 +0200
Subject: netfilter: xtables: idletimer target implementation

This patch implements an idletimer Xtables target that can be used to
identify when interfaces have been idle for a certain period of time.

Timers are identified by labels and are created when a rule is set with a new
label.  The rules also take a timeout value (in seconds) as an option.  If
more than one rule uses the same timer label, the timer will be restarted
whenever any of the rules get a hit.

One entry for each timer is created in sysfs.  This attribute contains the
timer remaining for the timer to expire.  The attributes are located under
the xt_idletimer class:

/sys/class/xt_idletimer/timers/<label>

When the timer expires, the target module sends a sysfs notification to the
userspace, which can then decide what to do (eg. disconnect to save power).

Cc: Timo Teras <timo.teras@iki.fi>
Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild         |   1 +
 include/linux/netfilter/xt_IDLETIMER.h |  45 +++++
 net/netfilter/Kconfig                  |  12 ++
 net/netfilter/Makefile                 |   1 +
 net/netfilter/xt_IDLETIMER.c           | 314 +++++++++++++++++++++++++++++++++
 5 files changed, 373 insertions(+)
 create mode 100644 include/linux/netfilter/xt_IDLETIMER.h
 create mode 100644 net/netfilter/xt_IDLETIMER.c

(limited to 'net')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 48767cd16453..bb103f43afa0 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -8,6 +8,7 @@ header-y += xt_CONNMARK.h
 header-y += xt_CONNSECMARK.h
 header-y += xt_CT.h
 header-y += xt_DSCP.h
+header-y += xt_IDLETIMER.h
 header-y += xt_LED.h
 header-y += xt_MARK.h
 header-y += xt_NFLOG.h
diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/linux/netfilter/xt_IDLETIMER.h
new file mode 100644
index 000000000000..3e1aa1be942e
--- /dev/null
+++ b/include/linux/netfilter/xt_IDLETIMER.h
@@ -0,0 +1,45 @@
+/*
+ * linux/include/linux/netfilter/xt_IDLETIMER.h
+ *
+ * Header file for Xtables timer target module.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and forward-ported to 2.6.34
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _XT_IDLETIMER_H
+#define _XT_IDLETIMER_H
+
+#include <linux/types.h>
+
+#define MAX_IDLETIMER_LABEL_SIZE 28
+
+struct idletimer_tg_info {
+	__u32 timeout;
+
+	char label[MAX_IDLETIMER_LABEL_SIZE];
+
+	/* for kernel module internal use only */
+	struct idletimer_tg *timer __attribute((aligned(8)));
+};
+
+#endif
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8593a77cfea9..413ed24a968a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -424,6 +424,18 @@ config NETFILTER_XT_TARGET_HL
 	since you can easily create immortal packets that loop
 	forever on the network.
 
+config NETFILTER_XT_TARGET_IDLETIMER
+	tristate  "IDLETIMER target support"
+	depends on NETFILTER_ADVANCED
+	help
+
+	  This option adds the `IDLETIMER' target.  Each matching packet
+	  resets the timer associated with label specified when the rule is
+	  added.  When the timer expires, it triggers a sysfs notification.
+	  The remaining time for expiration can be read via sysfs.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_TARGET_LED
 	tristate '"LED" target support'
 	depends on LEDS_CLASS && LEDS_TRIGGERS
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 14e3a8fd8180..e28420aac5ef 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
 
 # matches
 obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
new file mode 100644
index 000000000000..e11090a0675c
--- /dev/null
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -0,0 +1,314 @@
+/*
+ * linux/net/netfilter/xt_IDLETIMER.c
+ *
+ * Netfilter module to trigger a timer when packet matches.
+ * After timer expires a kevent will be sent.
+ *
+ * Copyright (C) 2004, 2010 Nokia Corporation
+ * Written by Timo Teras <ext-timo.teras@nokia.com>
+ *
+ * Converted to x_tables and reworked for upstream inclusion
+ * by Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * Contact: Luciano Coelho <luciano.coelho@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_IDLETIMER.h>
+#include <linux/kobject.h>
+#include <linux/workqueue.h>
+#include <linux/sysfs.h>
+
+struct idletimer_tg_attr {
+	struct attribute attr;
+	ssize_t	(*show)(struct kobject *kobj,
+			struct attribute *attr, char *buf);
+};
+
+struct idletimer_tg {
+	struct list_head entry;
+	struct timer_list timer;
+	struct work_struct work;
+
+	struct kobject *kobj;
+	struct idletimer_tg_attr attr;
+
+	unsigned int refcnt;
+};
+
+static LIST_HEAD(idletimer_tg_list);
+static DEFINE_MUTEX(list_mutex);
+
+static struct kobject *idletimer_tg_kobj;
+
+static
+struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
+{
+	struct idletimer_tg *entry;
+
+	BUG_ON(!label);
+
+	list_for_each_entry(entry, &idletimer_tg_list, entry) {
+		if (!strcmp(label, entry->attr.attr.name))
+			return entry;
+	}
+
+	return NULL;
+}
+
+static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
+				 char *buf)
+{
+	struct idletimer_tg *timer;
+	unsigned long expires = 0;
+
+	mutex_lock(&list_mutex);
+
+	timer =	__idletimer_tg_find_by_label(attr->name);
+	if (timer)
+		expires = timer->timer.expires;
+
+	mutex_unlock(&list_mutex);
+
+	if (time_after(expires, jiffies))
+		return sprintf(buf, "%u\n",
+			       jiffies_to_msecs(expires - jiffies) / 1000);
+
+	return sprintf(buf, "0\n");
+}
+
+static void idletimer_tg_work(struct work_struct *work)
+{
+	struct idletimer_tg *timer = container_of(work, struct idletimer_tg,
+						  work);
+
+	sysfs_notify(idletimer_tg_kobj, NULL, timer->attr.attr.name);
+}
+
+static void idletimer_tg_expired(unsigned long data)
+{
+	struct idletimer_tg *timer = (struct idletimer_tg *) data;
+
+	pr_debug("timer %s expired\n", timer->attr.attr.name);
+
+	schedule_work(&timer->work);
+}
+
+static int idletimer_tg_create(struct idletimer_tg_info *info)
+{
+	int ret;
+
+	info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
+	if (!info->timer) {
+		pr_debug("couldn't alloc timer\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
+	if (!info->timer->attr.attr.name) {
+		pr_debug("couldn't alloc attribute name\n");
+		ret = -ENOMEM;
+		goto out_free_timer;
+	}
+	info->timer->attr.attr.mode = S_IRUGO;
+	info->timer->attr.show = idletimer_tg_show;
+
+	ret = sysfs_create_file(idletimer_tg_kobj, &info->timer->attr.attr);
+	if (ret < 0) {
+		pr_debug("couldn't add file to sysfs");
+		goto out_free_attr;
+	}
+
+	list_add(&info->timer->entry, &idletimer_tg_list);
+
+	setup_timer(&info->timer->timer, idletimer_tg_expired,
+		    (unsigned long) info->timer);
+	info->timer->refcnt = 1;
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	INIT_WORK(&info->timer->work, idletimer_tg_work);
+
+	return 0;
+
+out_free_attr:
+	kfree(info->timer->attr.attr.name);
+out_free_timer:
+	kfree(info->timer);
+out:
+	return ret;
+}
+
+/*
+ * The actual xt_tables plugin.
+ */
+static unsigned int idletimer_tg_target(struct sk_buff *skb,
+					 const struct xt_action_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("resetting timer %s, timeout period %u\n",
+		 info->label, info->timeout);
+
+	BUG_ON(!info->timer);
+
+	mod_timer(&info->timer->timer,
+		  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+	return XT_CONTINUE;
+}
+
+static int idletimer_tg_checkentry(const struct xt_tgchk_param *par)
+{
+	struct idletimer_tg_info *info = par->targinfo;
+	int ret;
+
+	pr_debug("checkentry targinfo%s\n", info->label);
+
+	if (info->timeout == 0) {
+		pr_debug("timeout value is zero\n");
+		return -EINVAL;
+	}
+
+	if (info->label[0] == '\0' ||
+	    strnlen(info->label,
+		    MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) {
+		pr_debug("label is empty or not nul-terminated\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&list_mutex);
+
+	info->timer = __idletimer_tg_find_by_label(info->label);
+	if (info->timer) {
+		info->timer->refcnt++;
+		mod_timer(&info->timer->timer,
+			  msecs_to_jiffies(info->timeout * 1000) + jiffies);
+
+		pr_debug("increased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	} else {
+		ret = idletimer_tg_create(info);
+		if (ret < 0) {
+			pr_debug("failed to create timer\n");
+			mutex_unlock(&list_mutex);
+			return ret;
+		}
+	}
+
+	mutex_unlock(&list_mutex);
+	return 0;
+}
+
+static void idletimer_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	const struct idletimer_tg_info *info = par->targinfo;
+
+	pr_debug("destroy targinfo %s\n", info->label);
+
+	mutex_lock(&list_mutex);
+
+	if (--info->timer->refcnt == 0) {
+		pr_debug("deleting timer %s\n", info->label);
+
+		list_del(&info->timer->entry);
+		del_timer_sync(&info->timer->timer);
+		sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr);
+		kfree(info->timer->attr.attr.name);
+		kfree(info->timer);
+	} else {
+		pr_debug("decreased refcnt of timer %s to %u\n",
+			 info->label, info->timer->refcnt);
+	}
+
+	mutex_unlock(&list_mutex);
+}
+
+static struct xt_target idletimer_tg __read_mostly = {
+	.name		= "IDLETIMER",
+	.family		= NFPROTO_UNSPEC,
+	.target		= idletimer_tg_target,
+	.targetsize     = sizeof(struct idletimer_tg_info),
+	.checkentry	= idletimer_tg_checkentry,
+	.destroy        = idletimer_tg_destroy,
+	.me		= THIS_MODULE,
+};
+
+static struct class *idletimer_tg_class;
+
+static struct device *idletimer_tg_device;
+
+static int __init idletimer_tg_init(void)
+{
+	int err;
+
+	idletimer_tg_class = class_create(THIS_MODULE, "xt_idletimer");
+	err = PTR_ERR(idletimer_tg_class);
+	if (IS_ERR(idletimer_tg_class)) {
+		pr_debug("couldn't register device class\n");
+		goto out;
+	}
+
+	idletimer_tg_device = device_create(idletimer_tg_class, NULL,
+					    MKDEV(0, 0), NULL, "timers");
+	err = PTR_ERR(idletimer_tg_device);
+	if (IS_ERR(idletimer_tg_device)) {
+		pr_debug("couldn't register system device\n");
+		goto out_class;
+	}
+
+	idletimer_tg_kobj = &idletimer_tg_device->kobj;
+
+	err =  xt_register_target(&idletimer_tg);
+	if (err < 0) {
+		pr_debug("couldn't register xt target\n");
+		goto out_dev;
+	}
+
+	return 0;
+out_dev:
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+out_class:
+	class_destroy(idletimer_tg_class);
+out:
+	return err;
+}
+
+static void __exit idletimer_tg_exit(void)
+{
+	xt_unregister_target(&idletimer_tg);
+
+	device_destroy(idletimer_tg_class, MKDEV(0, 0));
+	class_destroy(idletimer_tg_class);
+}
+
+module_init(idletimer_tg_init);
+module_exit(idletimer_tg_exit);
+
+MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
+MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
+MODULE_DESCRIPTION("Xtables: idle time monitor");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-58-ga151


From c04ec8063d3a89e5ae933c7b4be04476fe65c6de Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:42 +0000
Subject: netpoll: Set npinfo to NULL even with ndo_netpoll_cleanup

Since we have to NULL npinfo regardless of whether there is a
ndo_netpoll_cleanup, it makes sense to do this unconditionally
in netpoll_cleanup rather than having every driver do it by
themselves.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e034342c819c..19ff66079f76 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -867,8 +867,7 @@ void netpoll_cleanup(struct netpoll *np)
 				ops = np->dev->netdev_ops;
 				if (ops->ndo_netpoll_cleanup)
 					ops->ndo_netpoll_cleanup(np->dev);
-				else
-					np->dev->npinfo = NULL;
+				np->dev->npinfo = NULL;
 			}
 		}
 
-- 
cgit v1.2.3-58-ga151


From 36655042f9873efc2a90d251b9aef9b6b79d75d8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:43 +0000
Subject: bridge: Remove redundant npinfo NULL setting

Now that netpoll always zaps npinfo we no longer need to do it
in bridge.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index b898364beaf5..f8cb908db81f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -231,7 +231,6 @@ void br_netpoll_cleanup(struct net_device *dev)
 	struct net_bridge_port *p, *n;
 	const struct net_device_ops *ops;
 
-	br->dev->npinfo = NULL;
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
 		if (p->dev) {
 			ops = p->dev->netdev_ops;
-- 
cgit v1.2.3-58-ga151


From de85d99eb7b595f6751550184b94c1e2f74a828b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:44 +0000
Subject: netpoll: Fix RCU usage

The use of RCU in netpoll is incorrect in a number of places:

1) The initial setting is lacking a write barrier.
2) The synchronize_rcu is in the wrong place.
3) Read barriers are missing.
4) Some places are even missing rcu_read_lock.
5) npinfo is zeroed after freeing.

This patch fixes those issues.  As most users are in BH context,
this also converts the RCU usage to the BH variant.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 13 ++++++++-----
 net/core/netpoll.c      | 20 ++++++++++++--------
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e9e231215865..95c9f7e16776 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -57,12 +57,15 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 #ifdef CONFIG_NETPOLL
 static inline bool netpoll_rx(struct sk_buff *skb)
 {
-	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll_info *npinfo;
 	unsigned long flags;
 	bool ret = false;
 
+	rcu_read_lock_bh();
+	npinfo = rcu_dereference(skb->dev->npinfo);
+
 	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
-		return false;
+		goto out;
 
 	spin_lock_irqsave(&npinfo->rx_lock, flags);
 	/* check rx_flags again with the lock held */
@@ -70,12 +73,14 @@ static inline bool netpoll_rx(struct sk_buff *skb)
 		ret = true;
 	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
+out:
+	rcu_read_unlock_bh();
 	return ret;
 }
 
 static inline int netpoll_rx_on(struct sk_buff *skb)
 {
-	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll_info *npinfo = rcu_dereference(skb->dev->npinfo);
 
 	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
 }
@@ -91,7 +96,6 @@ static inline void *netpoll_poll_lock(struct napi_struct *napi)
 {
 	struct net_device *dev = napi->dev;
 
-	rcu_read_lock(); /* deal with race on ->npinfo */
 	if (dev && dev->npinfo) {
 		spin_lock(&napi->poll_lock);
 		napi->poll_owner = smp_processor_id();
@@ -108,7 +112,6 @@ static inline void netpoll_poll_unlock(void *have)
 		napi->poll_owner = -1;
 		spin_unlock(&napi->poll_lock);
 	}
-	rcu_read_unlock();
 }
 
 #else
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 19ff66079f76..e9ab4f0c454c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -261,6 +261,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 	unsigned long tries;
 	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo = np->dev->npinfo;
 
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
@@ -810,10 +811,7 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	/* last thing to do is link it to the net device structure */
-	ndev->npinfo = npinfo;
-
-	/* avoid racing with NAPI reading npinfo */
-	synchronize_rcu();
+	rcu_assign_pointer(ndev->npinfo, npinfo);
 
 	return 0;
 
@@ -857,6 +855,16 @@ void netpoll_cleanup(struct netpoll *np)
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
 				const struct net_device_ops *ops;
+
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+
+				rcu_assign_pointer(np->dev->npinfo, NULL);
+
+				/* avoid racing with NAPI reading npinfo */
+				synchronize_rcu_bh();
+
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -864,10 +872,6 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				ops = np->dev->netdev_ops;
-				if (ops->ndo_netpoll_cleanup)
-					ops->ndo_netpoll_cleanup(np->dev);
-				np->dev->npinfo = NULL;
 			}
 		}
 
-- 
cgit v1.2.3-58-ga151


From dbaa154178341689faaa08fbf40b94ae5ca1d6c0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:46 +0000
Subject: netpoll: Add locking for netpoll_setup/cleanup

As it stands, netpoll_setup and netpoll_cleanup have no locking
protection whatsoever.  So chaos ensures if two entities try to
perform them on the same device.

This patch adds RTNL to the equation.  The code has been rearranged so
that bits that do not need RTNL protection are now moved to the top of
netpoll_setup.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 151 +++++++++++++++++++++++++++--------------------------
 1 file changed, 76 insertions(+), 75 deletions(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e9ab4f0c454c..d10c249bcc8f 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -698,7 +698,6 @@ int netpoll_setup(struct netpoll *np)
 	struct net_device *ndev = NULL;
 	struct in_device *in_dev;
 	struct netpoll_info *npinfo;
-	struct netpoll *npe, *tmp;
 	unsigned long flags;
 	int err;
 
@@ -710,38 +709,6 @@ int netpoll_setup(struct netpoll *np)
 		return -ENODEV;
 	}
 
-	np->dev = ndev;
-	if (!ndev->npinfo) {
-		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
-		if (!npinfo) {
-			err = -ENOMEM;
-			goto put;
-		}
-
-		npinfo->rx_flags = 0;
-		INIT_LIST_HEAD(&npinfo->rx_np);
-
-		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
-		skb_queue_head_init(&npinfo->txq);
-		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
-
-		atomic_set(&npinfo->refcnt, 1);
-	} else {
-		npinfo = ndev->npinfo;
-		atomic_inc(&npinfo->refcnt);
-	}
-
-	npinfo->netpoll = np;
-
-	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
-	    !ndev->netdev_ops->ndo_poll_controller) {
-		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
-		       np->name, np->dev_name);
-		err = -ENOTSUPP;
-		goto release;
-	}
-
 	if (!netif_running(ndev)) {
 		unsigned long atmost, atleast;
 
@@ -755,7 +722,7 @@ int netpoll_setup(struct netpoll *np)
 		if (err) {
 			printk(KERN_ERR "%s: failed to open %s\n",
 			       np->name, ndev->name);
-			goto release;
+			goto put;
 		}
 
 		atleast = jiffies + HZ/10;
@@ -792,7 +759,7 @@ int netpoll_setup(struct netpoll *np)
 			printk(KERN_ERR "%s: no IP address for %s, aborting\n",
 			       np->name, np->dev_name);
 			err = -EDESTADDRREQ;
-			goto release;
+			goto put;
 		}
 
 		np->local_ip = in_dev->ifa_list->ifa_local;
@@ -800,6 +767,43 @@ int netpoll_setup(struct netpoll *np)
 		printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
 	}
 
+	np->dev = ndev;
+
+	/* fill up the skb queue */
+	refill_skbs();
+
+	rtnl_lock();
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+		       np->name, np->dev_name);
+		err = -ENOTSUPP;
+		goto unlock;
+	}
+
+	if (!ndev->npinfo) {
+		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		if (!npinfo) {
+			err = -ENOMEM;
+			goto unlock;
+		}
+
+		npinfo->rx_flags = 0;
+		INIT_LIST_HEAD(&npinfo->rx_np);
+
+		spin_lock_init(&npinfo->rx_lock);
+		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->txq);
+		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+		atomic_set(&npinfo->refcnt, 1);
+	} else {
+		npinfo = ndev->npinfo;
+		atomic_inc(&npinfo->refcnt);
+	}
+
+	npinfo->netpoll = np;
+
 	if (np->rx_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
 		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
@@ -807,24 +811,14 @@ int netpoll_setup(struct netpoll *np)
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
-	/* fill up the skb queue */
-	refill_skbs();
-
 	/* last thing to do is link it to the net device structure */
 	rcu_assign_pointer(ndev->npinfo, npinfo);
+	rtnl_unlock();
 
 	return 0;
 
- release:
-	if (!ndev->npinfo) {
-		spin_lock_irqsave(&npinfo->rx_lock, flags);
-		list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
-			npe->dev = NULL;
-		}
-		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-
-		kfree(npinfo);
-	}
+unlock:
+	rtnl_unlock();
 put:
 	dev_put(ndev);
 	return err;
@@ -841,43 +835,50 @@ void netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
 	unsigned long flags;
+	int free = 0;
 
-	if (np->dev) {
-		npinfo = np->dev->npinfo;
-		if (npinfo) {
-			if (!list_empty(&npinfo->rx_np)) {
-				spin_lock_irqsave(&npinfo->rx_lock, flags);
-				list_del(&np->rx);
-				if (list_empty(&npinfo->rx_np))
-					npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
-				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-			}
+	if (!np->dev)
+		return;
 
-			if (atomic_dec_and_test(&npinfo->refcnt)) {
-				const struct net_device_ops *ops;
+	rtnl_lock();
+	npinfo = np->dev->npinfo;
+	if (npinfo) {
+		if (!list_empty(&npinfo->rx_np)) {
+			spin_lock_irqsave(&npinfo->rx_lock, flags);
+			list_del(&np->rx);
+			if (list_empty(&npinfo->rx_np))
+				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+			spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+		}
 
-				ops = np->dev->netdev_ops;
-				if (ops->ndo_netpoll_cleanup)
-					ops->ndo_netpoll_cleanup(np->dev);
+		free = atomic_dec_and_test(&npinfo->refcnt);
+		if (free) {
+			const struct net_device_ops *ops;
 
-				rcu_assign_pointer(np->dev->npinfo, NULL);
+			ops = np->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(np->dev);
 
-				/* avoid racing with NAPI reading npinfo */
-				synchronize_rcu_bh();
+			rcu_assign_pointer(np->dev->npinfo, NULL);
+		}
+	}
+	rtnl_unlock();
 
-				skb_queue_purge(&npinfo->arp_tx);
-				skb_queue_purge(&npinfo->txq);
-				cancel_rearming_delayed_work(&npinfo->tx_work);
+	if (free) {
+		/* avoid racing with NAPI reading npinfo */
+		synchronize_rcu_bh();
 
-				/* clean after last, unfinished work */
-				__skb_queue_purge(&npinfo->txq);
-				kfree(npinfo);
-			}
-		}
+		skb_queue_purge(&npinfo->arp_tx);
+		skb_queue_purge(&npinfo->txq);
+		cancel_rearming_delayed_work(&npinfo->tx_work);
 
-		dev_put(np->dev);
+		/* clean after last, unfinished work */
+		__skb_queue_purge(&npinfo->txq);
+		kfree(npinfo);
 	}
 
+	dev_put(np->dev);
+
 	np->dev = NULL;
 }
 
-- 
cgit v1.2.3-58-ga151


From 4247e161b12f8dffb7ee3ee07bc5e61f714ebe2d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:47 +0000
Subject: netpoll: Add ndo_netpoll_setup

This patch adds ndo_netpoll_setup as the initialisation primitive
to complement ndo_netpoll_cleanup.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/netpoll.c        | 10 ++++++++++
 2 files changed, 12 insertions(+)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4fbccc5f609a..fb20cc55ba52 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -744,6 +744,8 @@ struct net_device_ops {
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
+	int			(*ndo_netpoll_setup)(struct net_device *dev,
+						     struct netpoll_info *info);
 	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index d10c249bcc8f..7de6dcad5d79 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -698,6 +698,7 @@ int netpoll_setup(struct netpoll *np)
 	struct net_device *ndev = NULL;
 	struct in_device *in_dev;
 	struct netpoll_info *npinfo;
+	const struct net_device_ops *ops;
 	unsigned long flags;
 	int err;
 
@@ -797,6 +798,13 @@ int netpoll_setup(struct netpoll *np)
 		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
 
 		atomic_set(&npinfo->refcnt, 1);
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_setup) {
+			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			if (err)
+				goto free_npinfo;
+		}
 	} else {
 		npinfo = ndev->npinfo;
 		atomic_inc(&npinfo->refcnt);
@@ -817,6 +825,8 @@ int netpoll_setup(struct netpoll *np)
 
 	return 0;
 
+free_npinfo:
+	kfree(npinfo);
 unlock:
 	rtnl_unlock();
 put:
-- 
cgit v1.2.3-58-ga151


From 8fdd95ec162a8fbac7f41d6f54f90402fe3e8cb1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:48 +0000
Subject: netpoll: Allow netpoll_setup/cleanup recursion

This patch adds the functions __netpoll_setup/__netpoll_cleanup
which is designed to be called recursively through ndo_netpoll_seutp.

They must be called with RTNL held, and the caller must initialise
np->dev and ensure that it has a valid reference count.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |   2 +
 net/core/netpoll.c      | 176 ++++++++++++++++++++++++++----------------------
 2 files changed, 99 insertions(+), 79 deletions(-)

(limited to 'net')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 95c9f7e16776..f3ad74af7e1f 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -46,9 +46,11 @@ void netpoll_poll(struct netpoll *np);
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
 int netpoll_parse_options(struct netpoll *np, char *opt);
+int __netpoll_setup(struct netpoll *np);
 int netpoll_setup(struct netpoll *np);
 int netpoll_trap(void);
 void netpoll_set_trap(int trap);
+void __netpoll_cleanup(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
 void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 7de6dcad5d79..560297ee55b4 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -693,15 +693,78 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	return -1;
 }
 
-int netpoll_setup(struct netpoll *np)
+int __netpoll_setup(struct netpoll *np)
 {
-	struct net_device *ndev = NULL;
-	struct in_device *in_dev;
+	struct net_device *ndev = np->dev;
 	struct netpoll_info *npinfo;
 	const struct net_device_ops *ops;
 	unsigned long flags;
 	int err;
 
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
+		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
+		       np->name, np->dev_name);
+		err = -ENOTSUPP;
+		goto out;
+	}
+
+	if (!ndev->npinfo) {
+		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
+		if (!npinfo) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		npinfo->rx_flags = 0;
+		INIT_LIST_HEAD(&npinfo->rx_np);
+
+		spin_lock_init(&npinfo->rx_lock);
+		skb_queue_head_init(&npinfo->arp_tx);
+		skb_queue_head_init(&npinfo->txq);
+		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
+
+		atomic_set(&npinfo->refcnt, 1);
+
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_setup) {
+			err = ops->ndo_netpoll_setup(ndev, npinfo);
+			if (err)
+				goto free_npinfo;
+		}
+	} else {
+		npinfo = ndev->npinfo;
+		atomic_inc(&npinfo->refcnt);
+	}
+
+	npinfo->netpoll = np;
+
+	if (np->rx_hook) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
+		list_add_tail(&np->rx, &npinfo->rx_np);
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
+
+	/* last thing to do is link it to the net device structure */
+	rcu_assign_pointer(ndev->npinfo, npinfo);
+	rtnl_unlock();
+
+	return 0;
+
+free_npinfo:
+	kfree(npinfo);
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(__netpoll_setup);
+
+int netpoll_setup(struct netpoll *np)
+{
+	struct net_device *ndev = NULL;
+	struct in_device *in_dev;
+	int err;
+
 	if (np->dev_name)
 		ndev = dev_get_by_name(&init_net, np->dev_name);
 	if (!ndev) {
@@ -774,61 +837,14 @@ int netpoll_setup(struct netpoll *np)
 	refill_skbs();
 
 	rtnl_lock();
-	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
-	    !ndev->netdev_ops->ndo_poll_controller) {
-		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
-		       np->name, np->dev_name);
-		err = -ENOTSUPP;
-		goto unlock;
-	}
-
-	if (!ndev->npinfo) {
-		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
-		if (!npinfo) {
-			err = -ENOMEM;
-			goto unlock;
-		}
-
-		npinfo->rx_flags = 0;
-		INIT_LIST_HEAD(&npinfo->rx_np);
-
-		spin_lock_init(&npinfo->rx_lock);
-		skb_queue_head_init(&npinfo->arp_tx);
-		skb_queue_head_init(&npinfo->txq);
-		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);
-
-		atomic_set(&npinfo->refcnt, 1);
-
-		ops = np->dev->netdev_ops;
-		if (ops->ndo_netpoll_setup) {
-			err = ops->ndo_netpoll_setup(ndev, npinfo);
-			if (err)
-				goto free_npinfo;
-		}
-	} else {
-		npinfo = ndev->npinfo;
-		atomic_inc(&npinfo->refcnt);
-	}
-
-	npinfo->netpoll = np;
-
-	if (np->rx_hook) {
-		spin_lock_irqsave(&npinfo->rx_lock, flags);
-		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
-		list_add_tail(&np->rx, &npinfo->rx_np);
-		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-	}
-
-	/* last thing to do is link it to the net device structure */
-	rcu_assign_pointer(ndev->npinfo, npinfo);
+	err = __netpoll_setup(np);
 	rtnl_unlock();
 
+	if (err)
+		goto put;
+
 	return 0;
 
-free_npinfo:
-	kfree(npinfo);
-unlock:
-	rtnl_unlock();
 put:
 	dev_put(ndev);
 	return err;
@@ -841,40 +857,32 @@ static int __init netpoll_init(void)
 }
 core_initcall(netpoll_init);
 
-void netpoll_cleanup(struct netpoll *np)
+void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
 	unsigned long flags;
-	int free = 0;
 
-	if (!np->dev)
+	npinfo = np->dev->npinfo;
+	if (!npinfo)
 		return;
 
-	rtnl_lock();
-	npinfo = np->dev->npinfo;
-	if (npinfo) {
-		if (!list_empty(&npinfo->rx_np)) {
-			spin_lock_irqsave(&npinfo->rx_lock, flags);
-			list_del(&np->rx);
-			if (list_empty(&npinfo->rx_np))
-				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
-			spin_unlock_irqrestore(&npinfo->rx_lock, flags);
-		}
+	if (!list_empty(&npinfo->rx_np)) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_del(&np->rx);
+		if (list_empty(&npinfo->rx_np))
+			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+	}
 
-		free = atomic_dec_and_test(&npinfo->refcnt);
-		if (free) {
-			const struct net_device_ops *ops;
+	if (atomic_dec_and_test(&npinfo->refcnt)) {
+		const struct net_device_ops *ops;
 
-			ops = np->dev->netdev_ops;
-			if (ops->ndo_netpoll_cleanup)
-				ops->ndo_netpoll_cleanup(np->dev);
+		ops = np->dev->netdev_ops;
+		if (ops->ndo_netpoll_cleanup)
+			ops->ndo_netpoll_cleanup(np->dev);
 
-			rcu_assign_pointer(np->dev->npinfo, NULL);
-		}
-	}
-	rtnl_unlock();
+		rcu_assign_pointer(np->dev->npinfo, NULL);
 
-	if (free) {
 		/* avoid racing with NAPI reading npinfo */
 		synchronize_rcu_bh();
 
@@ -886,9 +894,19 @@ void netpoll_cleanup(struct netpoll *np)
 		__skb_queue_purge(&npinfo->txq);
 		kfree(npinfo);
 	}
+}
+EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-	dev_put(np->dev);
+void netpoll_cleanup(struct netpoll *np)
+{
+	if (!np->dev)
+		return;
 
+	rtnl_lock();
+	__netpoll_cleanup(np);
+	rtnl_unlock();
+
+	dev_put(np->dev);
 	np->dev = NULL;
 }
 
-- 
cgit v1.2.3-58-ga151


From 91d2c34a4eed32876ca333b0ca44f3bc56645805 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 10 Jun 2010 16:12:50 +0000
Subject: bridge: Fix netpoll support

There are multiple problems with the newly added netpoll support:

1) Use-after-free on each netpoll packet.
2) Invoking unsafe code on netpoll/IRQ path.
3) Breaks when netpoll is enabled on the underlying device.

This patch fixes all of these problems.  In particular, we now
allocate proper netpoll structures for each underlying device.

We only allow netpoll to be enabled on the bridge when all the
devices underneath it support netpoll.  Once it is enabled, we
do not allow non-netpoll devices to join the bridge (until netpoll
is disabled again).

This allows us to do away with the npinfo juggling that caused
problem number 1.

Incidentally this patch fixes number 2 by bypassing unsafe code
such as multicast snooping and netfilter.

Reported-by: Qianfeng Zhang <frzhang@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 108 +++++++++++++++++++++++++++---------------------
 net/bridge/br_forward.c |  34 ++++++---------
 net/bridge/br_if.c      |  16 +++----
 net/bridge/br_private.h |  46 +++++++++++++++++----
 4 files changed, 120 insertions(+), 84 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f8cb908db81f..6f3a9279be30 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -47,6 +47,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_pull(skb, ETH_HLEN);
 
 	if (is_multicast_ether_addr(dest)) {
+		if (unlikely(netpoll_tx_running(dev))) {
+			br_flood_deliver(br, skb);
+			goto out;
+		}
 		if (br_multicast_rcv(br, NULL, skb))
 			goto out;
 
@@ -199,72 +203,81 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static bool br_devices_support_netpoll(struct net_bridge *br)
+static void br_poll_controller(struct net_device *br_dev)
 {
-	struct net_bridge_port *p;
-	bool ret = true;
-	int count = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&br->lock, flags);
-	list_for_each_entry(p, &br->port_list, list) {
-		count++;
-		if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
-		    !p->dev->netdev_ops->ndo_poll_controller)
-			ret = false;
-	}
-	spin_unlock_irqrestore(&br->lock, flags);
-	return count != 0 && ret;
 }
 
-static void br_poll_controller(struct net_device *br_dev)
+static void br_netpoll_cleanup(struct net_device *dev)
 {
-	struct netpoll *np = br_dev->npinfo->netpoll;
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_port *p, *n;
 
-	if (np->real_dev != br_dev)
-		netpoll_poll_dev(np->real_dev);
+	list_for_each_entry_safe(p, n, &br->port_list, list) {
+		br_netpoll_disable(p);
+	}
 }
 
-void br_netpoll_cleanup(struct net_device *dev)
+static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni)
 {
 	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p, *n;
-	const struct net_device_ops *ops;
+	int err = 0;
 
 	list_for_each_entry_safe(p, n, &br->port_list, list) {
-		if (p->dev) {
-			ops = p->dev->netdev_ops;
-			if (ops->ndo_netpoll_cleanup)
-				ops->ndo_netpoll_cleanup(p->dev);
-			else
-				p->dev->npinfo = NULL;
-		}
+		if (!p->dev)
+			continue;
+
+		err = br_netpoll_enable(p);
+		if (err)
+			goto fail;
 	}
+
+out:
+	return err;
+
+fail:
+	br_netpoll_cleanup(dev);
+	goto out;
 }
 
-void br_netpoll_disable(struct net_bridge *br,
-			struct net_device *dev)
+int br_netpoll_enable(struct net_bridge_port *p)
 {
-	if (br_devices_support_netpoll(br))
-		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-	if (dev->netdev_ops->ndo_netpoll_cleanup)
-		dev->netdev_ops->ndo_netpoll_cleanup(dev);
-	else
-		dev->npinfo = NULL;
+	struct netpoll *np;
+	int err = 0;
+
+	np = kzalloc(sizeof(*p->np), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!np)
+		goto out;
+
+	np->dev = p->dev;
+
+	err = __netpoll_setup(np);
+	if (err) {
+		kfree(np);
+		goto out;
+	}
+
+	p->np = np;
+
+out:
+	return err;
 }
 
-void br_netpoll_enable(struct net_bridge *br,
-		       struct net_device *dev)
+void br_netpoll_disable(struct net_bridge_port *p)
 {
-	if (br_devices_support_netpoll(br)) {
-		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-		if (br->dev->npinfo)
-			dev->npinfo = br->dev->npinfo;
-	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
-		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
-		br_info(br,"new device %s does not support netpoll (disabling)",
-			dev->name);
-	}
+	struct netpoll *np = p->np;
+
+	if (!np)
+		return;
+
+	p->np = NULL;
+
+	/* Wait for transmitting packets to finish before freeing. */
+	synchronize_rcu_bh();
+
+	__netpoll_cleanup(np);
+	kfree(np);
 }
 
 #endif
@@ -293,6 +306,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_setup	 = br_netpoll_setup,
 	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
 	.ndo_poll_controller	 = br_poll_controller,
 #endif
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a98ef1393097..6e97711fd2c5 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 			kfree_skb(skb);
 		else {
 			skb_push(skb, ETH_HLEN);
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
-				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
-				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
-			} else
-#endif
-				dev_queue_xmit(skb);
+			dev_queue_xmit(skb);
 		}
 	}
 
@@ -73,23 +66,20 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	struct net_bridge *br = to->br;
-	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
-		struct netpoll *np;
-		to->dev->npinfo = skb->dev->npinfo;
-		np = skb->dev->npinfo->netpoll;
-		np->real_dev = np->dev = to->dev;
-		to->dev->priv_flags |= IFF_IN_NETPOLL;
-	}
-#endif
 	skb->dev = to->dev;
+
+	if (unlikely(netpoll_tx_running(to->dev))) {
+		if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
+			kfree_skb(skb);
+		else {
+			skb_push(skb, ETH_HLEN);
+			br_netpoll_send_skb(to, skb);
+		}
+		return;
+	}
+
 	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 		br_forward_finish);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	if (skb->dev->npinfo)
-		skb->dev->npinfo->netpoll->dev = br->dev;
-#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index d9242342837e..97ac9da4d76c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -155,7 +155,8 @@ static void del_nbp(struct net_bridge_port *p)
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
-	br_netpoll_disable(br, dev);
+	br_netpoll_disable(p);
+
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -168,8 +169,6 @@ static void del_br(struct net_bridge *br, struct list_head *head)
 		del_nbp(p);
 	}
 
-	br_netpoll_cleanup(br->dev);
-
 	del_timer_sync(&br->gc_timer);
 
 	br_sysfs_delbr(br->dev);
@@ -429,11 +428,14 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (err)
 		goto err2;
 
+	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
+		goto err3;
+
 	rcu_assign_pointer(dev->br_port, p);
 
 	err = netdev_rx_handler_register(dev, br_handle_frame);
 	if (err)
-		goto err3;
+		goto err4;
 
 	dev_disable_lro(dev);
 
@@ -454,11 +456,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
-	br_netpoll_enable(br, dev);
-
 	return 0;
-err3:
+err4:
 	rcu_assign_pointer(dev->br_port, NULL);
+err3:
+	sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
 	br_fdb_delete_by_port(br, p, 1);
 err1:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c83519b555bb..0f5394c4f2f1 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -15,6 +15,7 @@
 
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
+#include <linux/netpoll.h>
 #include <net/route.h>
 
 #define BR_HASH_BITS 8
@@ -143,6 +144,10 @@ struct net_bridge_port
 #ifdef CONFIG_SYSFS
 	char				sysfs_name[IFNAMSIZ];
 #endif
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct netpoll			*np;
+#endif
 };
 
 struct br_cpu_netstats {
@@ -273,16 +278,41 @@ extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
 #ifdef CONFIG_NET_POLL_CONTROLLER
-extern void br_netpoll_cleanup(struct net_device *dev);
-extern void br_netpoll_enable(struct net_bridge *br,
-			      struct net_device *dev);
-extern void br_netpoll_disable(struct net_bridge *br,
-			       struct net_device *dev);
+static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
+{
+	return br->dev->npinfo;
+}
+
+static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
+				       struct sk_buff *skb)
+{
+	struct netpoll *np = p->np;
+
+	if (np)
+		netpoll_send_skb(np, skb);
+}
+
+extern int br_netpoll_enable(struct net_bridge_port *p);
+extern void br_netpoll_disable(struct net_bridge_port *p);
 #else
-#define br_netpoll_cleanup(br)
-#define br_netpoll_enable(br, dev)
-#define br_netpoll_disable(br, dev)
+static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
+{
+	return NULL;
+}
+
+static inline void br_netpoll_send_skb(struct net_bridge_port *p,
+				       struct sk_buff *skb)
+{
+}
 
+static inline int br_netpoll_enable(struct net_bridge_port *p)
+{
+	return 0;
+}
+
+static inline void br_netpoll_disable(struct net_bridge_port *p)
+{
+}
 #endif
 
 /* br_fdb.c */
-- 
cgit v1.2.3-58-ga151


From 93e2c32b5cb2ad92ceb1d7a4684f20a0d25bf530 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 10 Jun 2010 03:34:59 +0000
Subject: net: add rx_handler data pointer

Add possibility to register rx_handler data pointer along with a rx_handler.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     | 2 +-
 include/linux/netdevice.h | 4 +++-
 net/bridge/br_if.c        | 2 +-
 net/core/dev.c            | 6 +++++-
 4 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 59c315556a30..87a3bf69c4a3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -532,7 +532,7 @@ static int macvlan_port_create(struct net_device *dev)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
 	rcu_assign_pointer(dev->macvlan_port, port);
 
-	err = netdev_rx_handler_register(dev, macvlan_handle_frame);
+	err = netdev_rx_handler_register(dev, macvlan_handle_frame, NULL);
 	if (err) {
 		rcu_assign_pointer(dev->macvlan_port, NULL);
 		kfree(port);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fb20cc55ba52..361ff1145cf1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -979,6 +979,7 @@ struct net_device {
 
 	struct netdev_queue	rx_queue;
 	rx_handler_func_t	*rx_handler;
+	void			*rx_handler_data;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
 
@@ -1712,7 +1713,8 @@ static inline void napi_free_frags(struct napi_struct *napi)
 }
 
 extern int netdev_rx_handler_register(struct net_device *dev,
-				      rx_handler_func_t *rx_handler);
+				      rx_handler_func_t *rx_handler,
+				      void *rx_handler_data);
 extern void netdev_rx_handler_unregister(struct net_device *dev);
 
 extern void		netif_nit_deliver(struct sk_buff *skb);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 97ac9da4d76c..0d142ed0bbe3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -433,7 +433,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	rcu_assign_pointer(dev->br_port, p);
 
-	err = netdev_rx_handler_register(dev, br_handle_frame);
+	err = netdev_rx_handler_register(dev, br_handle_frame, NULL);
 	if (err)
 		goto err4;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index a1abc10db08a..abdb19e547a7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2703,6 +2703,7 @@ void netif_nit_deliver(struct sk_buff *skb)
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
  *	@rx_handler: receive handler to register
+ *	@rx_handler_data: data pointer that is used by rx handler
  *
  *	Register a receive hander for a device. This handler will then be
  *	called from __netif_receive_skb. A negative errno code is returned
@@ -2711,13 +2712,15 @@ void netif_nit_deliver(struct sk_buff *skb)
  *	The caller must hold the rtnl_mutex.
  */
 int netdev_rx_handler_register(struct net_device *dev,
-			       rx_handler_func_t *rx_handler)
+			       rx_handler_func_t *rx_handler,
+			       void *rx_handler_data)
 {
 	ASSERT_RTNL();
 
 	if (dev->rx_handler)
 		return -EBUSY;
 
+	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
 	rcu_assign_pointer(dev->rx_handler, rx_handler);
 
 	return 0;
@@ -2737,6 +2740,7 @@ void netdev_rx_handler_unregister(struct net_device *dev)
 
 	ASSERT_RTNL();
 	rcu_assign_pointer(dev->rx_handler, NULL);
+	rcu_assign_pointer(dev->rx_handler_data, NULL);
 }
 EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
 
-- 
cgit v1.2.3-58-ga151


From f350a0a87374418635689471606454abc7beaa3a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 15 Jun 2010 06:50:45 +0000
Subject: bridge: use rx_handler_data pointer to store net_bridge_port pointer

Register net_bridge_port pointer as rx_handler data pointer. As br_port is
removed from struct net_device, another netdev priv_flag is added to indicate
the device serves as a bridge port. Also rcuized pointers are now correctly
dereferenced in br_fdb.c and in netfilter parts.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ksz884x.c                       |  2 +-
 drivers/staging/batman-adv/hard-interface.c |  2 +-
 include/linux/if.h                          |  1 +
 include/linux/netdevice.h                   |  2 --
 net/bridge/br_fdb.c                         |  4 ++--
 net/bridge/br_if.c                          | 23 +++++++++++++----------
 net/bridge/br_input.c                       |  9 ++++-----
 net/bridge/br_netfilter.c                   | 11 ++++++-----
 net/bridge/br_netlink.c                     |  9 +++++----
 net/bridge/br_notify.c                      |  5 +++--
 net/bridge/br_private.h                     |  5 +++++
 net/bridge/br_stp_bpdu.c                    |  5 +++--
 net/bridge/netfilter/ebt_redirect.c         |  3 ++-
 net/bridge/netfilter/ebt_ulog.c             |  8 +++++---
 net/bridge/netfilter/ebtables.c             | 11 +++++++----
 net/core/dev.c                              |  3 ++-
 net/netfilter/nfnetlink_log.c               |  6 ++++--
 net/netfilter/nfnetlink_queue.c             |  6 ++++--
 net/wireless/nl80211.c                      |  2 +-
 net/wireless/util.c                         |  4 ++--
 20 files changed, 71 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ksz884x.c b/drivers/net/ksz884x.c
index 7805bbf1d53a..62362b4a8c56 100644
--- a/drivers/net/ksz884x.c
+++ b/drivers/net/ksz884x.c
@@ -5718,7 +5718,7 @@ static void dev_set_promiscuous(struct net_device *dev, struct dev_priv *priv,
 		 * from the bridge.
 		 */
 		if ((hw->features & STP_SUPPORT) && !promiscuous &&
-				dev->br_port) {
+		    (dev->priv_flags & IFF_BRIDGE_PORT)) {
 			struct ksz_switch *sw = hw->ksz_switch;
 			int port = priv->port.first_port;
 
diff --git a/drivers/staging/batman-adv/hard-interface.c b/drivers/staging/batman-adv/hard-interface.c
index 7a582e80de18..5ede9c255094 100644
--- a/drivers/staging/batman-adv/hard-interface.c
+++ b/drivers/staging/batman-adv/hard-interface.c
@@ -71,7 +71,7 @@ static int is_valid_iface(struct net_device *net_dev)
 #endif
 
 	/* Device is being bridged */
-	/* if (net_dev->br_port != NULL)
+	/* if (net_dev->priv_flags & IFF_BRIDGE_PORT)
 		return 0; */
 
 	return 1;
diff --git a/include/linux/if.h b/include/linux/if.h
index 31f2e27ebcd0..53558ec59e1b 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -74,6 +74,7 @@
 #define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
 #define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 #define IFF_MACVLAN_PORT	0x4000	/* device used as macvlan port */
+#define IFF_BRIDGE_PORT	0x8000		/* device used as bridge port */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5f231de2032f..a7e0458029b5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1047,8 +1047,6 @@ struct net_device {
 	/* mid-layer private */
 	void			*ml_priv;
 
-	/* bridge stuff */
-	struct net_bridge_port	*br_port;
 	/* GARP */
 	struct garp_port	*garp_port;
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 26637439965b..6818e609b2c0 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -242,11 +242,11 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
 	struct net_bridge_fdb_entry *fdb;
 	int ret;
 
-	if (!dev->br_port)
+	if (!br_port_exists(dev))
 		return 0;
 
 	rcu_read_lock();
-	fdb = __br_fdb_get(dev->br_port->br, addr);
+	fdb = __br_fdb_get(br_port_get_rcu(dev)->br, addr);
 	ret = fdb && fdb->dst->dev != dev &&
 		fdb->dst->state == BR_STATE_FORWARDING;
 	rcu_read_unlock();
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 0d142ed0bbe3..c03d2c3ff03e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -147,8 +147,9 @@ static void del_nbp(struct net_bridge_port *p)
 
 	list_del_rcu(&p->list);
 
+	dev->priv_flags &= ~IFF_BRIDGE_PORT;
+
 	netdev_rx_handler_unregister(dev);
-	rcu_assign_pointer(dev->br_port, NULL);
 
 	br_multicast_del_port(p);
 
@@ -400,7 +401,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 		return -ELOOP;
 
 	/* Device is already being bridged */
-	if (dev->br_port != NULL)
+	if (br_port_exists(dev))
 		return -EBUSY;
 
 	/* No bridging devices that dislike that (e.g. wireless) */
@@ -431,11 +432,11 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	if (br_netpoll_info(br) && ((err = br_netpoll_enable(p))))
 		goto err3;
 
-	rcu_assign_pointer(dev->br_port, p);
-
-	err = netdev_rx_handler_register(dev, br_handle_frame, NULL);
+	err = netdev_rx_handler_register(dev, br_handle_frame, p);
 	if (err)
-		goto err4;
+		goto err3;
+
+	dev->priv_flags |= IFF_BRIDGE_PORT;
 
 	dev_disable_lro(dev);
 
@@ -457,8 +458,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
 	return 0;
-err4:
-	rcu_assign_pointer(dev->br_port, NULL);
 err3:
 	sysfs_remove_link(br->ifobj, p->dev->name);
 err2:
@@ -477,9 +476,13 @@ put_back:
 /* called with RTNL */
 int br_del_if(struct net_bridge *br, struct net_device *dev)
 {
-	struct net_bridge_port *p = dev->br_port;
+	struct net_bridge_port *p;
+
+	if (!br_port_exists(dev))
+		return -EINVAL;
 
-	if (!p || p->br != br)
+	p = br_port_get(dev);
+	if (p->br != br)
 		return -EINVAL;
 
 	del_nbp(p);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 99647d8f95c8..f076c9d79d5e 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -41,7 +41,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 int br_handle_frame_finish(struct sk_buff *skb)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	struct net_bridge *br;
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
@@ -111,10 +111,9 @@ drop:
 /* note: already called with rcu_read_lock (preempt_disabled) */
 static int br_handle_local_finish(struct sk_buff *skb)
 {
-	struct net_bridge_port *p = rcu_dereference(skb->dev->br_port);
+	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 
-	if (p)
-		br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
+	br_fdb_update(p->br, p, eth_hdr(skb)->h_source);
 	return 0;	 /* process further */
 }
 
@@ -151,7 +150,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
 	if (!skb)
 		return NULL;
 
-	p = rcu_dereference(skb->dev->br_port);
+	p = br_port_get_rcu(skb->dev);
 
 	if (unlikely(is_link_local(dest))) {
 		/* Pause frames shouldn't be passed up by driver anyway */
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 0685b2558ab5..f54404ddee53 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -127,16 +127,17 @@ void br_netfilter_rtable_init(struct net_bridge *br)
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
-	struct net_bridge_port *port = rcu_dereference(dev->br_port);
-
-	return port ? &port->br->fake_rtable : NULL;
+	if (!br_port_exists(dev))
+		return NULL;
+	return &br_port_get_rcu(dev)->br->fake_rtable;
 }
 
 static inline struct net_device *bridge_parent(const struct net_device *dev)
 {
-	struct net_bridge_port *port = rcu_dereference(dev->br_port);
+	if (!br_port_exists(dev))
+		return NULL;
 
-	return port ? port->br->dev : NULL;
+	return br_port_get_rcu(dev)->br->dev;
 }
 
 static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index fe0a79018ab2..4a6a378c84e3 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -120,10 +120,11 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 	idx = 0;
 	for_each_netdev(net, dev) {
 		/* not a bridge port */
-		if (dev->br_port == NULL || idx < cb->args[0])
+		if (!br_port_exists(dev) || idx < cb->args[0])
 			goto skip;
 
-		if (br_fill_ifinfo(skb, dev->br_port, NETLINK_CB(cb->skb).pid,
+		if (br_fill_ifinfo(skb, br_port_get(dev),
+				   NETLINK_CB(cb->skb).pid,
 				   cb->nlh->nlmsg_seq, RTM_NEWLINK,
 				   NLM_F_MULTI) < 0)
 			break;
@@ -168,9 +169,9 @@ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
 	if (!dev)
 		return -ENODEV;
 
-	p = dev->br_port;
-	if (!p)
+	if (!br_port_exists(dev))
 		return -EINVAL;
+	p = br_port_get(dev);
 
 	/* if kernel STP is running, don't allow changes */
 	if (p->br->stp_enabled == BR_KERNEL_STP)
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 717e1fd6133c..404d4e14c6a7 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -32,14 +32,15 @@ struct notifier_block br_device_notifier = {
 static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
 {
 	struct net_device *dev = ptr;
-	struct net_bridge_port *p = dev->br_port;
+	struct net_bridge_port *p = br_port_get(dev);
 	struct net_bridge *br;
 	int err;
 
 	/* not a port of a bridge */
-	if (p == NULL)
+	if (!br_port_exists(dev))
 		return NOTIFY_DONE;
 
+	p = br_port_get(dev);
 	br = p->br;
 
 	switch (event) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 0f5394c4f2f1..f6bc979b1135 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -150,6 +150,11 @@ struct net_bridge_port
 #endif
 };
 
+#define br_port_get_rcu(dev) \
+	((struct net_bridge_port *) rcu_dereference(dev->rx_handler_data))
+#define br_port_get(dev) ((struct net_bridge_port *) dev->rx_handler_data)
+#define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
+
 struct br_cpu_netstats {
 	unsigned long	rx_packets;
 	unsigned long	rx_bytes;
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 217bd225a42f..70aecb48fb69 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -137,12 +137,13 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb,
 		struct net_device *dev)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
-	struct net_bridge_port *p = rcu_dereference(dev->br_port);
+	struct net_bridge_port *p;
 	struct net_bridge *br;
 	const unsigned char *buf;
 
-	if (!p)
+	if (!br_port_exists(dev))
 		goto err;
+	p = br_port_get_rcu(dev);
 
 	if (!pskb_may_pull(skb, 4))
 		goto err;
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9e19166ba453..46624bb6d9be 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -24,8 +24,9 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		return EBT_DROP;
 
 	if (par->hooknum != NF_BR_BROUTING)
+		/* rcu_read_lock()ed by nf_hook_slow */
 		memcpy(eth_hdr(skb)->h_dest,
-		       par->in->br_port->br->dev->dev_addr, ETH_ALEN);
+		       br_port_get_rcu(par->in)->br->dev->dev_addr, ETH_ALEN);
 	else
 		memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN);
 	skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index ae3c7cef1484..26377e96fa1c 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -177,8 +177,9 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	if (in) {
 		strcpy(pm->physindev, in->name);
 		/* If in isn't a bridge, then physindev==indev */
-		if (in->br_port)
-			strcpy(pm->indev, in->br_port->br->dev->name);
+		if (br_port_exists(in))
+			/* rcu_read_lock()ed by nf_hook_slow */
+			strcpy(pm->indev, br_port_get_rcu(in)->br->dev->name);
 		else
 			strcpy(pm->indev, in->name);
 	} else
@@ -187,7 +188,8 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 	if (out) {
 		/* If out exists, then out is a bridge port */
 		strcpy(pm->physoutdev, out->name);
-		strcpy(pm->outdev, out->br_port->br->dev->name);
+		/* rcu_read_lock()ed by nf_hook_slow */
+		strcpy(pm->outdev, br_port_get_rcu(out)->br->dev->name);
 	} else
 		pm->outdev[0] = pm->physoutdev[0] = '\0';
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 59ca00e40dec..bcc102e3be4d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -140,11 +140,14 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h,
 		return 1;
 	if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT))
 		return 1;
-	if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN))
+	/* rcu_read_lock()ed by nf_hook_slow */
+	if (in && br_port_exists(in) &&
+	    FWINV2(ebt_dev_check(e->logical_in, br_port_get_rcu(in)->br->dev),
+		   EBT_ILOGICALIN))
 		return 1;
-	if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check(
-	   e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT))
+	if (out && br_port_exists(out) &&
+	    FWINV2(ebt_dev_check(e->logical_out, br_port_get_rcu(out)->br->dev),
+		   EBT_ILOGICALOUT))
 		return 1;
 
 	if (e->bitmask & EBT_SOURCEMAC) {
diff --git a/net/core/dev.c b/net/core/dev.c
index abdb19e547a7..5902426ef585 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2765,7 +2765,8 @@ int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
 	if (master->priv_flags & IFF_MASTER_ARPMON)
 		dev->last_rx = jiffies;
 
-	if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+	if ((master->priv_flags & IFF_MASTER_ALB) &&
+	    (master->priv_flags & IFF_BRIDGE_PORT)) {
 		/* Do address unmangle. The local destination address
 		 * will be always the one master has. Provides the right
 		 * functionality in a bridge.
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index fc9a211e629e..e0504e90a0f0 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -403,8 +403,9 @@ __build_packet_message(struct nfulnl_instance *inst,
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV,
-				     htonl(indev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(indev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -430,8 +431,9 @@ __build_packet_message(struct nfulnl_instance *inst,
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
 			NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV,
-				     htonl(outdev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is a bridge group, we need to look
 			 * for physical device (when called from ipv4) */
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 12e1ab37fcd8..cc3ae861e8f3 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -296,8 +296,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSINDEV,
 				     htonl(indev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by __nf_queue */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV,
-				     htonl(indev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(indev)->br->dev->ifindex));
 		} else {
 			/* Case 2: indev is bridge group, we need to look for
 			 * physical device (when called from ipv4) */
@@ -321,8 +322,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_PHYSOUTDEV,
 				     htonl(outdev->ifindex));
 			/* this is the bridge group "brX" */
+			/* rcu_read_lock()ed by __nf_queue */
 			NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV,
-				     htonl(outdev->br_port->br->dev->ifindex));
+				     htonl(br_port_get_rcu(outdev)->br->dev->ifindex));
 		} else {
 			/* Case 2: outdev is bridge group, we need to look for
 			 * physical output device (when called from ipv4) */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 90ab3c8519be..3a7b8a2f2d5a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1107,7 +1107,7 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
 			       enum nl80211_iftype iftype)
 {
 	if (!use_4addr) {
-		if (netdev && netdev->br_port)
+		if (netdev && (netdev->priv_flags & IFF_BRIDGE_PORT))
 			return -EBUSY;
 		return 0;
 	}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3416373a9c0c..0c8a1e8b7690 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -770,8 +770,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 		return -EOPNOTSUPP;
 
 	/* if it's part of a bridge, reject changing type to station/ibss */
-	if (dev->br_port && (ntype == NL80211_IFTYPE_ADHOC ||
-			     ntype == NL80211_IFTYPE_STATION))
+	if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
+	    (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION))
 		return -EBUSY;
 
 	if (ntype != otype) {
-- 
cgit v1.2.3-58-ga151


From a3433f35a55f7604742cae620c6dc6edfc70db6a Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 12 Jun 2010 14:01:43 +0000
Subject: tcp: unify tcp flag macros

unify tcp flag macros: TCPHDR_FIN, TCPHDR_SYN, TCPHDR_RST, TCPHDR_PSH,
TCPHDR_ACK, TCPHDR_URG, TCPHDR_ECE and TCPHDR_CWR. TCBCB_FLAG_* are replaced
with the corresponding TCPHDR_*.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/tcp.h                      |   24 ++++++-------
 net/ipv4/tcp.c                         |    8 ++--
 net/ipv4/tcp_input.c                   |    2 -
 net/ipv4/tcp_output.c                  |   59 ++++++++++++++++-----------------
 net/netfilter/nf_conntrack_proto_tcp.c |   32 ++++++-----------
 net/netfilter/xt_TCPMSS.c              |    4 --
 6 files changed, 58 insertions(+), 71 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h                      | 24 +++++++-------
 net/ipv4/tcp.c                         |  8 ++---
 net/ipv4/tcp_input.c                   |  2 +-
 net/ipv4/tcp_output.c                  | 59 +++++++++++++++++-----------------
 net/netfilter/nf_conntrack_proto_tcp.c | 32 +++++++-----------
 net/netfilter/xt_TCPMSS.c              |  4 +--
 6 files changed, 58 insertions(+), 71 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 573166484413..9e68e25c8b82 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -602,6 +602,17 @@ extern u32	__tcp_select_window(struct sock *sk);
  */
 #define tcp_time_stamp		((__u32)(jiffies))
 
+#define tcp_flag_byte(th) (((u_int8_t *)th)[13])
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+
 /* This is what the send packet queuing engine uses to pass
  * TCP per-packet control information to the transmission
  * code.  We also store the host-order sequence numbers in
@@ -620,19 +631,6 @@ struct tcp_skb_cb {
 	__u32		end_seq;	/* SEQ + FIN + SYN + datalen	*/
 	__u32		when;		/* used to compute rtt's	*/
 	__u8		flags;		/* TCP header flags.		*/
-
-	/* NOTE: These must match up to the flags byte in a
-	 *       real TCP header.
-	 */
-#define TCPCB_FLAG_FIN		0x01
-#define TCPCB_FLAG_SYN		0x02
-#define TCPCB_FLAG_RST		0x04
-#define TCPCB_FLAG_PSH		0x08
-#define TCPCB_FLAG_ACK		0x10
-#define TCPCB_FLAG_URG		0x20
-#define TCPCB_FLAG_ECE		0x40
-#define TCPCB_FLAG_CWR		0x80
-
 	__u8		sacked;		/* State flags for SACK/FACK.	*/
 #define TCPCB_SACKED_ACKED	0x01	/* SKB ACK'd by a SACK block	*/
 #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 49d0d2b8900c..779d40c3b96e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -511,7 +511,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+	TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 	tp->pushed_seq = tp->write_seq;
 }
 
@@ -527,7 +527,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 
 	skb->csum    = 0;
 	tcb->seq     = tcb->end_seq = tp->write_seq;
-	tcb->flags   = TCPCB_FLAG_ACK;
+	tcb->flags   = TCPHDR_ACK;
 	tcb->sacked  = 0;
 	skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
@@ -815,7 +815,7 @@ new_segment:
 		skb_shinfo(skb)->gso_segs = 0;
 
 		if (!copied)
-			TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
 
 		copied += copy;
 		poffset += copy;
@@ -1061,7 +1061,7 @@ new_segment:
 			}
 
 			if (!copied)
-				TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
+				TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
 
 			tp->write_seq += copy;
 			TCP_SKB_CB(skb)->end_seq += copy;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 548d575e6cc6..04334661fa28 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3286,7 +3286,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 		 * connection startup slow start one packet too
 		 * quickly.  This is severely frowned upon behavior.
 		 */
-		if (!(scb->flags & TCPCB_FLAG_SYN)) {
+		if (!(scb->flags & TCPHDR_SYN)) {
 			flag |= FLAG_DATA_ACKED;
 		} else {
 			flag |= FLAG_SYN_ACKED;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957f201a..51d316dbb058 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -294,9 +294,9 @@ static u16 tcp_select_window(struct sock *sk)
 /* Packet ECN state for a SYN-ACK */
 static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
+	TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
 	if (!(tp->ecn_flags & TCP_ECN_OK))
-		TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
+		TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
 }
 
 /* Packet ECN state for a SYN.  */
@@ -306,7 +306,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 
 	tp->ecn_flags = 0;
 	if (sysctl_tcp_ecn == 1) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
 		tp->ecn_flags = TCP_ECN_OK;
 	}
 }
@@ -361,7 +361,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 	skb_shinfo(skb)->gso_type = 0;
 
 	TCP_SKB_CB(skb)->seq = seq;
-	if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
+	if (flags & (TCPHDR_SYN | TCPHDR_FIN))
 		seq++;
 	TCP_SKB_CB(skb)->end_seq = seq;
 }
@@ -820,7 +820,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	tcb = TCP_SKB_CB(skb);
 	memset(&opts, 0, sizeof(opts));
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN))
+	if (unlikely(tcb->flags & TCPHDR_SYN))
 		tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
 	else
 		tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -843,7 +843,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	*(((__be16 *)th) + 6)	= htons(((tcp_header_size >> 2) << 12) |
 					tcb->flags);
 
-	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+	if (unlikely(tcb->flags & TCPHDR_SYN)) {
 		/* RFC1323: The window in SYN & SYN/ACK segments
 		 * is never scaled.
 		 */
@@ -866,7 +866,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	}
 
 	tcp_options_write((__be32 *)(th + 1), tp, &opts);
-	if (likely((tcb->flags & TCPCB_FLAG_SYN) == 0))
+	if (likely((tcb->flags & TCPHDR_SYN) == 0))
 		TCP_ECN_send(sk, skb, tcp_header_size);
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -880,7 +880,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	icsk->icsk_af_ops->send_check(sk, skb);
 
-	if (likely(tcb->flags & TCPCB_FLAG_ACK))
+	if (likely(tcb->flags & TCPHDR_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
 
 	if (skb->len != tcp_header_size)
@@ -1023,7 +1023,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 	TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
 
@@ -1328,8 +1328,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
 	u32 in_flight, cwnd;
 
 	/* Don't be strict about the congestion window for the final FIN.  */
-	if ((TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
-	    tcp_skb_pcount(skb) == 1)
+	if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
 		return 1;
 
 	in_flight = tcp_packets_in_flight(tp);
@@ -1398,7 +1397,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
 	 * Nagle can be ignored during F-RTO too (see RFC4138).
 	 */
 	if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
+	    (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
 		return 1;
 
 	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
@@ -1487,7 +1486,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 
 	/* PSH and FIN should only be set in the second packet. */
 	flags = TCP_SKB_CB(skb)->flags;
-	TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
+	TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
 	TCP_SKB_CB(buff)->flags = flags;
 
 	/* This packet was never sent out yet, so no SACK bits. */
@@ -1518,7 +1517,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 send_win, cong_win, limit, in_flight;
 
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
+	if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
 		goto send_now;
 
 	if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1644,7 +1643,7 @@ static int tcp_mtu_probe(struct sock *sk)
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
-	TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK;
+	TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
 	TCP_SKB_CB(nskb)->sacked = 0;
 	nskb->csum = 0;
 	nskb->ip_summed = skb->ip_summed;
@@ -1669,7 +1668,7 @@ static int tcp_mtu_probe(struct sock *sk)
 			sk_wmem_free_skb(sk, skb);
 		} else {
 			TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
-						   ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
+						   ~(TCPHDR_FIN|TCPHDR_PSH);
 			if (!skb_shinfo(skb)->nr_frags) {
 				skb_pull(skb, copy);
 				if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -2020,7 +2019,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
 
 	if (!sysctl_tcp_retrans_collapse)
 		return;
-	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)
+	if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
 		return;
 
 	tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2112,7 +2111,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 * since it is cheap to do so and saves bytes on the network.
 	 */
 	if (skb->len > 0 &&
-	    (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
+	    (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
 	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
 		if (!pskb_trim(skb, 0)) {
 			/* Reuse, even though it does some unnecessary work */
@@ -2301,7 +2300,7 @@ void tcp_send_fin(struct sock *sk)
 	mss_now = tcp_current_mss(sk);
 
 	if (tcp_send_head(sk) != NULL) {
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
 		TCP_SKB_CB(skb)->end_seq++;
 		tp->write_seq++;
 	} else {
@@ -2318,7 +2317,7 @@ void tcp_send_fin(struct sock *sk)
 		skb_reserve(skb, MAX_TCP_HEADER);
 		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
 		tcp_init_nondata_skb(skb, tp->write_seq,
-				     TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
+				     TCPHDR_ACK | TCPHDR_FIN);
 		tcp_queue_skb(sk, skb);
 	}
 	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2343,7 +2342,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(skb, MAX_TCP_HEADER);
 	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
-			     TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
+			     TCPHDR_ACK | TCPHDR_RST);
 	/* Send it off. */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	if (tcp_transmit_skb(sk, skb, 0, priority))
@@ -2363,11 +2362,11 @@ int tcp_send_synack(struct sock *sk)
 	struct sk_buff *skb;
 
 	skb = tcp_write_queue_head(sk);
-	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
+	if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
 		printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
 		return -EFAULT;
 	}
-	if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
+	if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
 		if (skb_cloned(skb)) {
 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
 			if (nskb == NULL)
@@ -2381,7 +2380,7 @@ int tcp_send_synack(struct sock *sk)
 			skb = nskb;
 		}
 
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -2460,7 +2459,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	 * not even correctly set)
 	 */
 	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
-			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
+			     TCPHDR_SYN | TCPHDR_ACK);
 
 	if (OPTION_COOKIE_EXTENSION & opts.options) {
 		if (s_data_desired) {
@@ -2592,7 +2591,7 @@ int tcp_connect(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 
 	tp->snd_nxt = tp->write_seq;
-	tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
+	tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
 	TCP_ECN_send_syn(sk, buff);
 
 	/* Send it off. */
@@ -2698,7 +2697,7 @@ void tcp_send_ack(struct sock *sk)
 
 	/* Reserve space for headers and prepare control bits. */
 	skb_reserve(buff, MAX_TCP_HEADER);
-	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
+	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
 
 	/* Send it off, this clears delayed acks for us. */
 	TCP_SKB_CB(buff)->when = tcp_time_stamp;
@@ -2732,7 +2731,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	 * end to send an ack.  Don't queue or clone SKB, just
 	 * send it.
 	 */
-	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
+	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
@@ -2762,13 +2761,13 @@ int tcp_write_wakeup(struct sock *sk)
 		if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
 		    skb->len > mss) {
 			seg_size = min(seg_size, mss);
-			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+			TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 			if (tcp_fragment(sk, skb, seg_size, mss))
 				return -1;
 		} else if (!tcp_skb_pcount(skb))
 			tcp_set_skb_tso_segs(sk, skb, mss);
 
-		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+		TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
 		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
 		if (!err)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 9dd8cd4fb6e6..802dbffae8b4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -736,27 +736,19 @@ static bool tcp_in_window(const struct nf_conn *ct,
 	return res;
 }
 
-#define	TH_FIN	0x01
-#define	TH_SYN	0x02
-#define	TH_RST	0x04
-#define	TH_PUSH	0x08
-#define	TH_ACK	0x10
-#define	TH_URG	0x20
-#define	TH_ECE	0x40
-#define	TH_CWR	0x80
-
 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] =
+static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
+				 TCPHDR_URG) + 1] =
 {
-	[TH_SYN]			= 1,
-	[TH_SYN|TH_URG]			= 1,
-	[TH_SYN|TH_ACK]			= 1,
-	[TH_RST]			= 1,
-	[TH_RST|TH_ACK]			= 1,
-	[TH_FIN|TH_ACK]			= 1,
-	[TH_FIN|TH_ACK|TH_URG]		= 1,
-	[TH_ACK]			= 1,
-	[TH_ACK|TH_URG]			= 1,
+	[TCPHDR_SYN]				= 1,
+	[TCPHDR_SYN|TCPHDR_URG]			= 1,
+	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
+	[TCPHDR_RST]				= 1,
+	[TCPHDR_RST|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
+	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
+	[TCPHDR_ACK]				= 1,
+	[TCPHDR_ACK|TCPHDR_URG]			= 1,
 };
 
 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
@@ -803,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
 	}
 
 	/* Check TCP flags. */
-	tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH));
+	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
 	if (!tcp_valid_flags[tcpflags]) {
 		if (LOG_INVALID(net, IPPROTO_TCP))
 			nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 1841388c770a..eb81c380da1b 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -220,15 +220,13 @@ tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 }
 #endif
 
-#define TH_SYN 0x02
-
 /* Must specify -p tcp --syn */
 static inline bool find_syn_match(const struct xt_entry_match *m)
 {
 	const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
 
 	if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
-	    tcpinfo->flg_cmp & TH_SYN &&
+	    tcpinfo->flg_cmp & TCPHDR_SYN &&
 	    !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
 		return true;
 
-- 
cgit v1.2.3-58-ga151


From aa1039e73cc2cf834e99c09d2033d5d2675357b9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 08:23:14 +0000
Subject: inetpeer: RCU conversion

inetpeer currently uses an AVL tree protected by an rwlock.

It's possible to make most lookups use RCU

1) Add a struct rcu_head to struct inet_peer

2) add a lookup_rcu_bh() helper to perform lockless and opportunistic
lookup. This is a normal function, not a macro like lookup().

3) Add a limit to number of links followed by lookup_rcu_bh(). This is
needed in case we fall in a loop.

4) add an smp_wmb() in link_to_pool() right before node insert.

5) make unlink_from_pool() use atomic_cmpxchg() to make sure it can take
last reference to an inet_peer, since lockless readers could increase
refcount, even while we hold peers.lock.

6) Delay struct inet_peer freeing after rcu grace period so that
lookup_rcu_bh() cannot crash.

7) inet_getpeer() first attempts lockless lookup.
   Note this lookup can fail even if target is in AVL tree, but a
concurrent writer can let tree in a non correct form.
   If this attemps fails, lock is taken a regular lookup is performed
again.

8) convert peers.lock from rwlock to a spinlock

9) Remove SLAB_HWCACHE_ALIGN when peer_cachep is created, because
rcu_head adds 16 bytes on 64bit arches, doubling effective size (64 ->
128 bytes)
In a future patch, this is probably possible to revert this part, if rcu
field is put in an union to share space with rid, ip_id_count, tcp_ts &
tcp_ts_stamp. These fields being manipulated only with refcnt > 0.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h |   1 +
 net/ipv4/inetpeer.c    | 164 ++++++++++++++++++++++++++++---------------------
 2 files changed, 96 insertions(+), 69 deletions(-)

(limited to 'net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 87b1df0d4d8c..617404730422 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -26,6 +26,7 @@ struct inet_peer {
 	atomic_t		ip_id_count;	/* IP ID for the next packet */
 	__u32			tcp_ts;
 	__u32			tcp_ts_stamp;
+	struct rcu_head		rcu;
 };
 
 void			inet_initpeers(void) __init;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 035673fd42d4..58fbc7e2475e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -51,8 +51,8 @@
  *  lookups performed with disabled BHs.
  *
  *  Serialisation issues.
- *  1.  Nodes may appear in the tree only with the pool write lock held.
- *  2.  Nodes may disappear from the tree only with the pool write lock held
+ *  1.  Nodes may appear in the tree only with the pool lock held.
+ *  2.  Nodes may disappear from the tree only with the pool lock held
  *      AND reference count being 0.
  *  3.  Nodes appears and disappears from unused node list only under
  *      "inet_peer_unused_lock".
@@ -80,11 +80,11 @@ static const struct inet_peer peer_fake_node = {
 
 static struct {
 	struct inet_peer *root;
-	rwlock_t	lock;
+	spinlock_t	lock;
 	int		total;
 } peers = {
 	.root		= peer_avl_empty,
-	.lock		= __RW_LOCK_UNLOCKED(peers.lock),
+	.lock		= __SPIN_LOCK_UNLOCKED(peers.lock),
 	.total		= 0,
 };
 #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+			0, SLAB_PANIC,
 			NULL);
 
 	/* All the timers, started at system startup tend
@@ -153,16 +153,13 @@ static void unlink_from_unused(struct inet_peer *p)
 
 /*
  * Called with local BH disabled and the pool lock held.
- * _stack is known to be NULL or not at compile time,
- * so compiler will optimize the if (_stack) tests.
  */
 #define lookup(_daddr, _stack) 					\
 ({								\
 	struct inet_peer *u, **v;				\
-	if (_stack != NULL) {					\
-		stackptr = _stack;				\
-		*stackptr++ = &peers.root;			\
-	}							\
+								\
+	stackptr = _stack;					\
+	*stackptr++ = &peers.root;				\
 	for (u = peers.root; u != peer_avl_empty; ) {		\
 		if (_daddr == u->v4daddr)			\
 			break;					\
@@ -170,14 +167,41 @@ static void unlink_from_unused(struct inet_peer *p)
 			v = &u->avl_left;			\
 		else						\
 			v = &u->avl_right;			\
-		if (_stack != NULL)				\
-			*stackptr++ = v;			\
+		*stackptr++ = v;				\
 		u = *v;						\
 	}							\
 	u;							\
 })
 
-/* Called with local BH disabled and the pool write lock held. */
+/*
+ * Called with rcu_read_lock_bh()
+ * Because we hold no lock against a writer, its quite possible we fall
+ * in an endless loop.
+ * But every pointer we follow is guaranteed to be valid thanks to RCU.
+ * We exit from this function if number of links exceeds PEER_MAXDEPTH
+ */
+static struct inet_peer *lookup_rcu_bh(__be32 daddr)
+{
+	struct inet_peer *u = rcu_dereference_bh(peers.root);
+	int count = 0;
+
+	while (u != peer_avl_empty) {
+		if (daddr == u->v4daddr) {
+			if (unlikely(!atomic_inc_not_zero(&u->refcnt)))
+				u = NULL;
+			return u;
+		}
+		if ((__force __u32)daddr < (__force __u32)u->v4daddr)
+			u = rcu_dereference_bh(u->avl_left);
+		else
+			u = rcu_dereference_bh(u->avl_right);
+		if (unlikely(++count == PEER_MAXDEPTH))
+			break;
+	}
+	return NULL;
+}
+
+/* Called with local BH disabled and the pool lock held. */
 #define lookup_rightempty(start)				\
 ({								\
 	struct inet_peer *u, **v;				\
@@ -191,9 +215,10 @@ static void unlink_from_unused(struct inet_peer *p)
 	u;							\
 })
 
-/* Called with local BH disabled and the pool write lock held.
+/* Called with local BH disabled and the pool lock held.
  * Variable names are the proof of operation correctness.
- * Look into mm/map_avl.c for more detail description of the ideas.  */
+ * Look into mm/map_avl.c for more detail description of the ideas.
+ */
 static void peer_avl_rebalance(struct inet_peer **stack[],
 		struct inet_peer ***stackend)
 {
@@ -269,16 +294,22 @@ static void peer_avl_rebalance(struct inet_peer **stack[],
 	}
 }
 
-/* Called with local BH disabled and the pool write lock held. */
+/* Called with local BH disabled and the pool lock held. */
 #define link_to_pool(n)						\
 do {								\
 	n->avl_height = 1;					\
 	n->avl_left = peer_avl_empty;				\
 	n->avl_right = peer_avl_empty;				\
+	smp_wmb(); /* lockless readers can catch us now */	\
 	**--stackptr = n;					\
 	peer_avl_rebalance(stack, stackptr);			\
 } while (0)
 
+static void inetpeer_free_rcu(struct rcu_head *head)
+{
+	kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu));
+}
+
 /* May be called with local BH enabled. */
 static void unlink_from_pool(struct inet_peer *p)
 {
@@ -286,13 +317,13 @@ static void unlink_from_pool(struct inet_peer *p)
 
 	do_free = 0;
 
-	write_lock_bh(&peers.lock);
+	spin_lock_bh(&peers.lock);
 	/* Check the reference counter.  It was artificially incremented by 1
-	 * in cleanup() function to prevent sudden disappearing.  If the
-	 * reference count is still 1 then the node is referenced only as `p'
-	 * here and from the pool.  So under the exclusive pool lock it's safe
-	 * to remove the node and free it later. */
-	if (atomic_read(&p->refcnt) == 1) {
+	 * in cleanup() function to prevent sudden disappearing.  If we can
+	 * atomically (because of lockless readers) take this last reference,
+	 * it's safe to remove the node and free it later.
+	 */
+	if (atomic_cmpxchg(&p->refcnt, 1, 0) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
 		if (lookup(p->v4daddr, stack) != p)
@@ -321,17 +352,18 @@ static void unlink_from_pool(struct inet_peer *p)
 		peers.total--;
 		do_free = 1;
 	}
-	write_unlock_bh(&peers.lock);
+	spin_unlock_bh(&peers.lock);
 
 	if (do_free)
-		kmem_cache_free(peer_cachep, p);
+		call_rcu_bh(&p->rcu, inetpeer_free_rcu);
 	else
 		/* The node is used again.  Decrease the reference counter
 		 * back.  The loop "cleanup -> unlink_from_unused
 		 *   -> unlink_from_pool -> putpeer -> link_to_unused
 		 *   -> cleanup (for the same node)"
 		 * doesn't really exist because the entry will have a
-		 * recent deletion time and will not be cleaned again soon. */
+		 * recent deletion time and will not be cleaned again soon.
+		 */
 		inet_putpeer(p);
 }
 
@@ -375,62 +407,56 @@ static int cleanup_once(unsigned long ttl)
 /* Called with or without local BH being disabled. */
 struct inet_peer *inet_getpeer(__be32 daddr, int create)
 {
-	struct inet_peer *p, *n;
+	struct inet_peer *p;
 	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
 
-	/* Look up for the address quickly. */
-	read_lock_bh(&peers.lock);
-	p = lookup(daddr, NULL);
-	if (p != peer_avl_empty)
-		atomic_inc(&p->refcnt);
-	read_unlock_bh(&peers.lock);
+	/* Look up for the address quickly, lockless.
+	 * Because of a concurrent writer, we might not find an existing entry.
+	 */
+	rcu_read_lock_bh();
+	p = lookup_rcu_bh(daddr);
+	rcu_read_unlock_bh();
+
+	if (p) {
+		/* The existing node has been found.
+		 * Remove the entry from unused list if it was there.
+		 */
+		unlink_from_unused(p);
+		return p;
+	}
 
+	/* retry an exact lookup, taking the lock before.
+	 * At least, nodes should be hot in our cache.
+	 */
+	spin_lock_bh(&peers.lock);
+	p = lookup(daddr, stack);
 	if (p != peer_avl_empty) {
-		/* The existing node has been found. */
+		atomic_inc(&p->refcnt);
+		spin_unlock_bh(&peers.lock);
 		/* Remove the entry from unused list if it was there. */
 		unlink_from_unused(p);
 		return p;
 	}
-
-	if (!create)
-		return NULL;
-
-	/* Allocate the space outside the locked region. */
-	n = kmem_cache_alloc(peer_cachep, GFP_ATOMIC);
-	if (n == NULL)
-		return NULL;
-	n->v4daddr = daddr;
-	atomic_set(&n->refcnt, 1);
-	atomic_set(&n->rid, 0);
-	atomic_set(&n->ip_id_count, secure_ip_id(daddr));
-	n->tcp_ts_stamp = 0;
-
-	write_lock_bh(&peers.lock);
-	/* Check if an entry has suddenly appeared. */
-	p = lookup(daddr, stack);
-	if (p != peer_avl_empty)
-		goto out_free;
-
-	/* Link the node. */
-	link_to_pool(n);
-	INIT_LIST_HEAD(&n->unused);
-	peers.total++;
-	write_unlock_bh(&peers.lock);
+	p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL;
+	if (p) {
+		p->v4daddr = daddr;
+		atomic_set(&p->refcnt, 1);
+		atomic_set(&p->rid, 0);
+		atomic_set(&p->ip_id_count, secure_ip_id(daddr));
+		p->tcp_ts_stamp = 0;
+		INIT_LIST_HEAD(&p->unused);
+
+
+		/* Link the node. */
+		link_to_pool(p);
+		peers.total++;
+	}
+	spin_unlock_bh(&peers.lock);
 
 	if (peers.total >= inet_peer_threshold)
 		/* Remove one less-recently-used entry. */
 		cleanup_once(0);
 
-	return n;
-
-out_free:
-	/* The appropriate node is already in the pool. */
-	atomic_inc(&p->refcnt);
-	write_unlock_bh(&peers.lock);
-	/* Remove the entry from unused list if it was there. */
-	unlink_from_unused(p);
-	/* Free preallocated the preallocated node. */
-	kmem_cache_free(peer_cachep, n);
 	return p;
 }
 
-- 
cgit v1.2.3-58-ga151


From 2bbdf389a9432270147c8e8e35b021ad61772f65 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 13 Jun 2010 11:29:39 +0000
Subject: ipv6: syncookies: do not skip ->iif initialization

When syncookies are in effect, req->iif is left uninitialized.
In case of e.g. link-local addresses the route lookup then fails
and no syn-ack is sent.

Rearrange things so ->iif is also initialized in the syncookie case.

want_cookie can only be true when the isn was zero, thus move the want_cookie
check into the "!isn" branch.

Cc: Glenn Griffin <ggriffin.kernel@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5887141ad641..f87534569366 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1272,10 +1272,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
-	if (want_cookie) {
-		isn = cookie_v6_init_sequence(sk, skb, &req->mss);
-		req->cookie_ts = tmp_opt.tstamp_ok;
-	} else if (!isn) {
+	if (!isn) {
 		if (ipv6_opt_accepted(sk, skb) ||
 		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -1288,8 +1285,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (!sk->sk_bound_dev_if &&
 		    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
 			treq->iif = inet6_iif(skb);
-
-		isn = tcp_v6_init_sequence(skb);
+		if (!want_cookie) {
+			isn = tcp_v6_init_sequence(skb);
+		} else {
+			isn = cookie_v6_init_sequence(sk, skb, &req->mss);
+			req->cookie_ts = tmp_opt.tstamp_ok;
+		}
 	}
 	tcp_rsk(req)->snt_isn = isn;
 
-- 
cgit v1.2.3-58-ga151


From d27f9b35827ec91d71d3561c127a0a8135fb470d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 13 Jun 2010 23:02:24 +0000
Subject: ip_frag: Remove some atomic ops

Instead of doing one atomic operation per frag, we can factorize them.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 3 +--
 net/ipv6/reassembly.c  | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 75347ea70ea0..963c3689d0af 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -556,7 +556,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 
 	skb_shinfo(head)->frag_list = head->next;
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &qp->q.net->mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -566,8 +565,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &qp->q.net->mem);
 	}
+	atomic_sub(head->truesize, &qp->q.net->mem);
 
 	head->next = NULL;
 	head->dev = dev;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 6d4292ff5854..a630506151c3 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -524,7 +524,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	skb_shinfo(head)->frag_list = head->next;
 	skb_reset_transport_header(head);
 	skb_push(head, head->data - skb_network_header(head));
-	atomic_sub(head->truesize, &fq->q.net->mem);
 
 	for (fp=head->next; fp; fp = fp->next) {
 		head->data_len += fp->len;
@@ -534,8 +533,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 		else if (head->ip_summed == CHECKSUM_COMPLETE)
 			head->csum = csum_add(head->csum, fp->csum);
 		head->truesize += fp->truesize;
-		atomic_sub(fp->truesize, &fq->q.net->mem);
 	}
+	atomic_sub(head->truesize, &fq->q.net->mem);
 
 	head->next = NULL;
 	head->dev = dev;
-- 
cgit v1.2.3-58-ga151


From a95d8c88bea0c93505e1d143d075f112be2b25e3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 13 Jun 2010 23:22:43 +0000
Subject: ipfrag : frag_kfree_skb() cleanup

Third param (work) is unused, remove it.

Remove __inline__ and inline qualifiers.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_fragment.c | 9 +++------
 net/ipv6/reassembly.c  | 7 ++-----
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 963c3689d0af..858d34648eee 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -124,11 +124,8 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
 }
 
 /* Memory Tracking Functions. */
-static __inline__ void frag_kfree_skb(struct netns_frags *nf,
-		struct sk_buff *skb, int *work)
+static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
 {
-	if (work)
-		*work -= skb->truesize;
 	atomic_sub(skb->truesize, &nf->mem);
 	kfree_skb(skb);
 }
@@ -309,7 +306,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	fp = qp->q.fragments;
 	do {
 		struct sk_buff *xp = fp->next;
-		frag_kfree_skb(qp->q.net, fp, NULL);
+		frag_kfree_skb(qp->q.net, fp);
 		fp = xp;
 	} while (fp);
 
@@ -446,7 +443,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 				qp->q.fragments = next;
 
 			qp->q.meat -= free_it->len;
-			frag_kfree_skb(qp->q.net, free_it, NULL);
+			frag_kfree_skb(qp->q.net, free_it);
 		}
 	}
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a630506151c3..0b97230a3251 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -150,11 +150,8 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a)
 EXPORT_SYMBOL(ip6_frag_match);
 
 /* Memory Tracking Functions. */
-static inline void frag_kfree_skb(struct netns_frags *nf,
-		struct sk_buff *skb, int *work)
+static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
 {
-	if (work)
-		*work -= skb->truesize;
 	atomic_sub(skb->truesize, &nf->mem);
 	kfree_skb(skb);
 }
@@ -392,7 +389,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 				fq->q.fragments = next;
 
 			fq->q.meat -= free_it->len;
-			frag_kfree_skb(fq->q.net, free_it, NULL);
+			frag_kfree_skb(fq->q.net, free_it);
 		}
 	}
 
-- 
cgit v1.2.3-58-ga151


From fed396a585d8e1870b326f2e8e1888a72957abb8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Jun 2010 21:43:07 -0700
Subject: bridge: Fix OOM crash in deliver_clone

The bridge multicast patches introduced an OOM crash in the forward
path, when deliver_clone fails to clone the skb.

Reported-by: Mark Wagner <mwagner@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a98ef1393097..a4e72a89e4ff 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -140,10 +140,10 @@ static int deliver_clone(const struct net_bridge_port *prev,
 			 void (*__packet_hook)(const struct net_bridge_port *p,
 					       struct sk_buff *skb))
 {
+	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
+
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (!skb) {
-		struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
-
 		dev->stats.tx_dropped++;
 		return -ENOMEM;
 	}
-- 
cgit v1.2.3-58-ga151


From 9f70b0fcecd134890affe627aa2823591313a413 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Jun 2010 21:43:48 -0700
Subject: bridge: Add const to dummy br_netpoll_send_skb

The version of br_netpoll_send_skb used when netpoll is off is
missing a const thus causing a warning.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index f6bc979b1135..07cf57b373fe 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -305,7 +305,7 @@ static inline struct netpoll_info *br_netpoll_info(struct net_bridge *br)
 	return NULL;
 }
 
-static inline void br_netpoll_send_skb(struct net_bridge_port *p,
+static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 				       struct sk_buff *skb)
 {
 }
-- 
cgit v1.2.3-58-ga151


From 5f2f89209500623ccb4713ec4af7de86fd30a9e4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Jun 2010 21:47:39 -0700
Subject: inetpeer: do not use zero refcnt for freed entries

Followup of commit aa1039e73cc2 (inetpeer: RCU conversion)

Unused inet_peer entries have a null refcnt.

Using atomic_inc_not_zero() in rcu lookups is not going to work for
them, and slow path is taken.

Fix this using -1 marker instead of 0 for deleted entries.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inetpeer.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 58fbc7e2475e..349249fad2db 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -187,7 +187,12 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr)
 
 	while (u != peer_avl_empty) {
 		if (daddr == u->v4daddr) {
-			if (unlikely(!atomic_inc_not_zero(&u->refcnt)))
+			/* Before taking a reference, check if this entry was
+			 * deleted, unlink_from_pool() sets refcnt=-1 to make
+			 * distinction between an unused entry (refcnt=0) and
+			 * a freed one.
+			 */
+			if (unlikely(!atomic_add_unless(&u->refcnt, 1, -1)))
 				u = NULL;
 			return u;
 		}
@@ -322,8 +327,9 @@ static void unlink_from_pool(struct inet_peer *p)
 	 * in cleanup() function to prevent sudden disappearing.  If we can
 	 * atomically (because of lockless readers) take this last reference,
 	 * it's safe to remove the node and free it later.
+	 * We use refcnt=-1 to alert lockless readers this entry is deleted.
 	 */
-	if (atomic_cmpxchg(&p->refcnt, 1, 0) == 1) {
+	if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) {
 		struct inet_peer **stack[PEER_MAXDEPTH];
 		struct inet_peer ***stackptr, ***delp;
 		if (lookup(p->v4daddr, stack) != p)
-- 
cgit v1.2.3-58-ga151


From 317fe0e6c5dc9448bcef41a2e31fecfd3dba7f55 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 16 Jun 2010 04:52:13 +0000
Subject: inetpeer: restore small inet_peer structures

Addition of rcu_head to struct inet_peer added 16bytes on 64bit arches.

Thats a bit unfortunate, since old size was exactly 64 bytes.

This can be solved, using an union between this rcu_head an four fields,
that are normally used only when a refcount is taken on inet_peer.
rcu_head is used only when refcnt=-1, right before structure freeing.

Add a inet_peer_refcheck() function to check this assertion for a while.

We can bring back SLAB_HWCACHE_ALIGN qualifier in kmem cache creation.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 31 ++++++++++++++++++++++++++-----
 net/ipv4/inetpeer.c    |  4 ++--
 net/ipv4/route.c       |  1 +
 net/ipv4/tcp_ipv4.c    | 11 +++++++----
 4 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 617404730422..417d0c894f29 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -22,11 +22,21 @@ struct inet_peer {
 	__u32			dtime;		/* the time of last use of not
 						 * referenced entries */
 	atomic_t		refcnt;
-	atomic_t		rid;		/* Frag reception counter */
-	atomic_t		ip_id_count;	/* IP ID for the next packet */
-	__u32			tcp_ts;
-	__u32			tcp_ts_stamp;
-	struct rcu_head		rcu;
+	/*
+	 * Once inet_peer is queued for deletion (refcnt == -1), following fields
+	 * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp
+	 * We can share memory with rcu_head to keep inet_peer small
+	 * (less then 64 bytes)
+	 */
+	union {
+		struct {
+			atomic_t	rid;		/* Frag reception counter */
+			atomic_t	ip_id_count;	/* IP ID for the next packet */
+			__u32		tcp_ts;
+			__u32		tcp_ts_stamp;
+		};
+		struct rcu_head         rcu;
+	};
 };
 
 void			inet_initpeers(void) __init;
@@ -37,10 +47,21 @@ struct inet_peer	*inet_getpeer(__be32 daddr, int create);
 /* can be called from BH context or outside */
 extern void inet_putpeer(struct inet_peer *p);
 
+/*
+ * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * tcp_ts_stamp if no refcount is taken on inet_peer
+ */
+static inline void inet_peer_refcheck(const struct inet_peer *p)
+{
+	WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
+}
+
+
 /* can be called with or without local BH being disabled */
 static inline __u16	inet_getid(struct inet_peer *p, int more)
 {
 	more++;
+	inet_peer_refcheck(p);
 	return atomic_add_return(more, &p->ip_id_count) - more;
 }
 
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 349249fad2db..9ffa24b9a804 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -64,7 +64,7 @@
  *		   usually under some other lock to prevent node disappearing
  *		dtime: unused node list lock
  *		v4daddr: unchangeable
- *		ip_id_count: idlock
+ *		ip_id_count: atomic value (no lock needed)
  */
 
 static struct kmem_cache *peer_cachep __read_mostly;
@@ -129,7 +129,7 @@ void __init inet_initpeers(void)
 
 	peer_cachep = kmem_cache_create("inet_peer_cache",
 			sizeof(struct inet_peer),
-			0, SLAB_PANIC,
+			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
 
 	/* All the timers, started at system startup tend
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a291edbbc97f..03430de46166 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2881,6 +2881,7 @@ static int rt_fill_info(struct net *net,
 	error = rt->dst.error;
 	expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
 	if (rt->peer) {
+		inet_peer_refcheck(rt->peer);
 		id = atomic_read(&rt->peer->ip_id_count) & 0xffff;
 		if (rt->peer->tcp_ts_stamp) {
 			ts = rt->peer->tcp_ts;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7f9515c0379f..2e41e6f92968 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -204,10 +204,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
 		 * when trying new connection.
 		 */
-		if (peer != NULL &&
-		    (u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
-			tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
-			tp->rx_opt.ts_recent = peer->tcp_ts;
+		if (peer) {
+			inet_peer_refcheck(peer);
+			if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
+				tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
+				tp->rx_opt.ts_recent = peer->tcp_ts;
+			}
 		}
 	}
 
@@ -1351,6 +1353,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		    (dst = inet_csk_route_req(sk, req)) != NULL &&
 		    (peer = rt_get_peer((struct rtable *)dst)) != NULL &&
 		    peer->v4daddr == saddr) {
+			inet_peer_refcheck(peer);
 			if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
 			    (s32)(peer->tcp_ts - req->ts_recent) >
 							TCP_PAWS_WINDOW) {
-- 
cgit v1.2.3-58-ga151


From 8c76368174ed2359739f1b7b8a9c042b1ef839c4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 16 Jun 2010 14:42:15 -0700
Subject: syncookies: check decoded options against sysctl settings

Discard the ACK if we find options that do not match current sysctl
settings.

Previously it was possible to create a connection with sack, wscale,
etc. enabled even if the feature was disabled via sysctl.

Also remove an unneeded call to tcp_sack_reset() in
cookie_check_timestamp: Both call sites (cookie_v4_check,
cookie_v6_check) zero "struct tcp_options_received", hand it to
tcp_parse_options() (which does not change tcp_opt->num_sacks/dsack)
and then call cookie_check_timestamp().

Even if num_sacks/dsacks were changed, the structure is allocated on
the stack and after cookie_check_timestamp returns only a few selected
members are copied to the inet_request_sock.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 +-
 net/ipv4/syncookies.c | 25 +++++++++++++++++++------
 net/ipv6/syncookies.c |  4 ++--
 3 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 9e68e25c8b82..18c246c9b009 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -464,7 +464,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 				     __u16 *mss);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern void cookie_check_timestamp(struct tcp_options_received *tcp_opt);
+extern bool cookie_check_timestamp(struct tcp_options_received *tcp_opt);
 
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 02bef6aa8b30..51b5662545d6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -230,23 +230,36 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * The lowest 4 bits are for snd_wscale
  * The next 4 lsb are for rcv_wscale
  * The next lsb is for sack_ok
+ *
+ * return false if we decode an option that should not be.
  */
-void cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 {
 	/* echoed timestamp, 9 lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
 
+	if (!tcp_opt->saw_tstamp)  {
+		tcp_clear_options(tcp_opt);
+		return true;
+	}
+
+	if (!sysctl_tcp_timestamps)
+		return false;
+
 	tcp_opt->snd_wscale = options & 0xf;
 	options >>= 4;
 	tcp_opt->rcv_wscale = options & 0xf;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
 
-	if (tcp_opt->sack_ok)
-		tcp_sack_reset(tcp_opt);
+	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
+		return false;
 
-	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale)
+	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) {
 		tcp_opt->wscale_ok = 1;
+		return sysctl_tcp_window_scaling != 0;
+	}
+	return true;
 }
 EXPORT_SYMBOL(cookie_check_timestamp);
 
@@ -281,8 +294,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
+	if (!cookie_check_timestamp(&tcp_opt))
+		goto out;
 
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 70d330f8c990..c7ee57421ece 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -180,8 +180,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (tcp_opt.saw_tstamp)
-		cookie_check_timestamp(&tcp_opt);
+	if (!cookie_check_timestamp(&tcp_opt))
+		goto out;
 
 	ret = NULL;
 	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
-- 
cgit v1.2.3-58-ga151


From fa68a7822780fdc1295f7efb7e4313e62b447e75 Mon Sep 17 00:00:00 2001
From: Tom Hughes <tom@compton.nu>
Date: Tue, 15 Jun 2010 22:24:28 +0000
Subject: Clear IFF_XMIT_DST_RELEASE for teql interfaces

https://bugzilla.kernel.org/show_bug.cgi?id=16183

The sch_teql module, which can be used to load balance over a set of
underlying interfaces, stopped working after 2.6.30 and has been
broken in all kernels since then for any underlying interface which
requires the addition of link level headers.

The problem is that the transmit routine relies on being able to
access the destination address in the skb in order to do address
resolution once it has decided which underlying interface it is going
to transmit through.

In 2.6.31 the IFF_XMIT_DST_RELEASE flag was introduced, and set by
default for all interfaces, which causes the destination address to be
released before the transmit routine for the interface is called.

The solution is to clear that flag for teql interfaces.

Signed-off-by: Tom Hughes <tom@compton.nu>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_teql.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 3415b6ce1c0a..807643bdcbac 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -449,6 +449,7 @@ static __init void teql_master_setup(struct net_device *dev)
 	dev->tx_queue_len	= 100;
 	dev->flags		= IFF_NOARP;
 	dev->hard_header_len	= LL_MAX_HEADER;
+	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
 }
 
 static LIST_HEAD(master_dev_list);
-- 
cgit v1.2.3-58-ga151


From 3f551f9436c05a3b5eccdd6e94733df5bb98d2a5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:28:59 +0000
Subject: sock: Introduce cred_to_ucred

To keep the coming code clear and to allow both the sock
code and the scm code to share the logic introduce a
fuction to translate from struct cred to struct ucred.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h |  5 +++++
 net/core/sock.c        | 14 ++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'net')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 032a19eb61b1..a2fada9becb6 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -24,6 +24,9 @@ struct __kernel_sockaddr_storage {
 #include <linux/types.h>		/* pid_t			*/
 #include <linux/compiler.h>		/* __user			*/
 
+struct pid;
+struct cred;
+
 #define __sockaddr_check_size(size)	\
 	BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
 
@@ -309,6 +312,8 @@ struct ucred {
 #define IPX_TYPE	1
 
 #ifdef __KERNEL__
+extern void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred);
+
 extern int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 extern int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 			       int offset, int len);
diff --git a/net/core/sock.c b/net/core/sock.c
index f9ce0db41cd6..db8335ad7559 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -110,6 +110,7 @@
 #include <linux/tcp.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
+#include <linux/user_namespace.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -749,6 +750,19 @@ set_rcvbuf:
 EXPORT_SYMBOL(sock_setsockopt);
 
 
+void cred_to_ucred(struct pid *pid, const struct cred *cred,
+		   struct ucred *ucred)
+{
+	ucred->pid = pid_vnr(pid);
+	ucred->uid = ucred->gid = -1;
+	if (cred) {
+		struct user_namespace *current_ns = current_user_ns();
+
+		ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid);
+		ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
+	}
+}
+
 int sock_getsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, int __user *optlen)
 {
-- 
cgit v1.2.3-58-ga151


From 109f6e39fa07c48f580125f531f46cb7c245b528 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:30:14 +0000
Subject: af_unix: Allow SO_PEERCRED to work across namespaces.

Use struct pid and struct cred to store the peer credentials on struct
sock.  This gives enough information to convert the peer credential
information to a value relative to whatever namespace the socket is in
at the time.

This removes nasty surprises when using SO_PEERCRED on socket
connetions where the processes on either side are in different pid and
user namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h |  3 ++-
 net/core/sock.c    | 18 ++++++++++++------
 net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++---------
 3 files changed, 42 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index f8acf38f092f..4f26f2f83be9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -295,7 +295,8 @@ struct sock {
 	unsigned short		sk_ack_backlog;
 	unsigned short		sk_max_ack_backlog;
 	__u32			sk_priority;
-	struct ucred		sk_peercred;
+	struct pid		*sk_peer_pid;
+	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
 	long			sk_sndtimeo;
 	struct sk_filter      	*sk_filter;
diff --git a/net/core/sock.c b/net/core/sock.c
index db8335ad7559..0229d5566a46 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -915,11 +915,15 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		break;
 
 	case SO_PEERCRED:
-		if (len > sizeof(sk->sk_peercred))
-			len = sizeof(sk->sk_peercred);
-		if (copy_to_user(optval, &sk->sk_peercred, len))
+	{
+		struct ucred peercred;
+		if (len > sizeof(peercred))
+			len = sizeof(peercred);
+		cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+		if (copy_to_user(optval, &peercred, len))
 			return -EFAULT;
 		goto lenout;
+	}
 
 	case SO_PEERNAME:
 	{
@@ -1133,6 +1137,9 @@ static void __sk_free(struct sock *sk)
 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 		       __func__, atomic_read(&sk->sk_omem_alloc));
 
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	put_pid(sk->sk_peer_pid);
 	put_net(sock_net(sk));
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
@@ -1968,9 +1975,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_sndmsg_page	=	NULL;
 	sk->sk_sndmsg_off	=	0;
 
-	sk->sk_peercred.pid 	=	0;
-	sk->sk_peercred.uid	=	-1;
-	sk->sk_peercred.gid	=	-1;
+	sk->sk_peer_pid 	=	NULL;
+	sk->sk_peer_cred	=	NULL;
 	sk->sk_write_pending	=	0;
 	sk->sk_rcvlowat		=	1;
 	sk->sk_rcvtimeo		=	MAX_SCHEDULE_TIMEOUT;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fef2cc5e9d2b..e1f1349fae86 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -450,11 +450,31 @@ static int unix_release_sock(struct sock *sk, int embrion)
 	return 0;
 }
 
+static void init_peercred(struct sock *sk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(task_tgid(current));
+	sk->sk_peer_cred = get_current_cred();
+}
+
+static void copy_peercred(struct sock *sk, struct sock *peersk)
+{
+	put_pid(sk->sk_peer_pid);
+	if (sk->sk_peer_cred)
+		put_cred(sk->sk_peer_cred);
+	sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
+	sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+}
+
 static int unix_listen(struct socket *sock, int backlog)
 {
 	int err;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
+	struct pid *old_pid = NULL;
+	const struct cred *old_cred = NULL;
 
 	err = -EOPNOTSUPP;
 	if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -470,12 +490,14 @@ static int unix_listen(struct socket *sock, int backlog)
 	sk->sk_max_ack_backlog	= backlog;
 	sk->sk_state		= TCP_LISTEN;
 	/* set credentials so connect can copy them */
-	sk->sk_peercred.pid	= task_tgid_vnr(current);
-	current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid);
+	init_peercred(sk);
 	err = 0;
 
 out_unlock:
 	unix_state_unlock(sk);
+	put_pid(old_pid);
+	if (old_cred)
+		put_cred(old_cred);
 out:
 	return err;
 }
@@ -1140,8 +1162,7 @@ restart:
 	unix_peer(newsk)	= sk;
 	newsk->sk_state		= TCP_ESTABLISHED;
 	newsk->sk_type		= sk->sk_type;
-	newsk->sk_peercred.pid	= task_tgid_vnr(current);
-	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
+	init_peercred(newsk);
 	newu = unix_sk(newsk);
 	newsk->sk_wq		= &newu->peer_wq;
 	otheru = unix_sk(other);
@@ -1157,7 +1178,7 @@ restart:
 	}
 
 	/* Set credentials */
-	sk->sk_peercred = other->sk_peercred;
+	copy_peercred(sk, other);
 
 	sock->state	= SS_CONNECTED;
 	sk->sk_state	= TCP_ESTABLISHED;
@@ -1199,10 +1220,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
 	sock_hold(skb);
 	unix_peer(ska) = skb;
 	unix_peer(skb) = ska;
-	ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
-	current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid);
-	ska->sk_peercred.uid = skb->sk_peercred.uid;
-	ska->sk_peercred.gid = skb->sk_peercred.gid;
+	init_peercred(ska);
+	init_peercred(skb);
 
 	if (ska->sk_type != SOCK_DGRAM) {
 		ska->sk_state = TCP_ESTABLISHED;
-- 
cgit v1.2.3-58-ga151


From b47030c71dfd6c8cd5cb6e551b6f7f7cfc96f6a6 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:31:06 +0000
Subject: af_netlink: Add needed scm_destroy after scm_send.

scm_send occasionally allocates state in the scm_cookie, so I have
modified netlink_sendmsg to guarantee that when scm_send succeeds
scm_destory will be called to free that state.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Reviewed-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a2eb965207d3..7aeaa83193db 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1323,19 +1323,23 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
 
-	if (NULL == siocb->scm)
+	if (NULL == siocb->scm) {
 		siocb->scm = &scm;
+		memset(&scm, 0, sizeof(scm));
+	}
 	err = scm_send(sock, msg, siocb->scm);
 	if (err < 0)
 		return err;
 
 	if (msg->msg_namelen) {
+		err = -EINVAL;
 		if (addr->nl_family != AF_NETLINK)
-			return -EINVAL;
+			goto out;
 		dst_pid = addr->nl_pid;
 		dst_group = ffs(addr->nl_groups);
+		err =  -EPERM;
 		if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
-			return -EPERM;
+			goto out;
 	} else {
 		dst_pid = nlk->dst_pid;
 		dst_group = nlk->dst_group;
@@ -1387,6 +1391,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
 
 out:
+	scm_destroy(siocb->scm);
 	return err;
 }
 
-- 
cgit v1.2.3-58-ga151


From 257b5358b32f17e0603b6ff57b13610b0e02348f Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:32:34 +0000
Subject: scm: Capture the full credentials of the scm sender.

Start capturing not only the userspace pid, uid and gid values of the
sending process but also the struct pid and struct cred of the sending
process as well.

This is in preparation for properly supporting SCM_CREDENTIALS for
sockets that have different uid and/or pid namespaces at the different
ends.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Serge E. Hallyn <serge@hallyn.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h | 28 ++++++++++++++++++++++++----
 net/core/scm.c    | 24 ++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/scm.h b/include/net/scm.h
index 17d9d2e75ff1..31656506d967 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -19,6 +19,8 @@ struct scm_fp_list {
 };
 
 struct scm_cookie {
+	struct pid		*pid;		/* Skb credentials */
+	const struct cred	*cred;
 	struct scm_fp_list	*fp;		/* Passed files		*/
 	struct ucred		creds;		/* Skb credentials	*/
 #ifdef CONFIG_SECURITY_NETWORK
@@ -42,8 +44,27 @@ static __inline__ void unix_get_peersec_dgram(struct socket *sock, struct scm_co
 { }
 #endif /* CONFIG_SECURITY_NETWORK */
 
+static __inline__ void scm_set_cred(struct scm_cookie *scm,
+				    struct pid *pid, const struct cred *cred)
+{
+	scm->pid  = get_pid(pid);
+	scm->cred = get_cred(cred);
+	cred_to_ucred(pid, cred, &scm->creds);
+}
+
+static __inline__ void scm_destroy_cred(struct scm_cookie *scm)
+{
+	put_pid(scm->pid);
+	scm->pid  = NULL;
+
+	if (scm->cred)
+		put_cred(scm->cred);
+	scm->cred = NULL;
+}
+
 static __inline__ void scm_destroy(struct scm_cookie *scm)
 {
+	scm_destroy_cred(scm);
 	if (scm && scm->fp)
 		__scm_destroy(scm);
 }
@@ -51,10 +72,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm)
 static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 			       struct scm_cookie *scm)
 {
-	struct task_struct *p = current;
-	scm->creds.uid = current_uid();
-	scm->creds.gid = current_gid();
-	scm->creds.pid = task_tgid_vnr(p);
+	scm_set_cred(scm, task_tgid(current), current_cred());
 	scm->fp = NULL;
 	unix_get_peersec_dgram(sock, scm);
 	if (msg->msg_controllen <= 0)
@@ -96,6 +114,8 @@ static __inline__ void scm_recv(struct socket *sock, struct msghdr *msg,
 	if (test_bit(SOCK_PASSCRED, &sock->flags))
 		put_cmsg(msg, SOL_SOCKET, SCM_CREDENTIALS, sizeof(scm->creds), &scm->creds);
 
+	scm_destroy_cred(scm);
+
 	scm_passec(sock, msg, scm);
 
 	if (!scm->fp)
diff --git a/net/core/scm.c b/net/core/scm.c
index b88f6f9d0b97..681c976307b5 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -170,6 +170,30 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 			err = scm_check_creds(&p->creds);
 			if (err)
 				goto error;
+
+			if (pid_vnr(p->pid) != p->creds.pid) {
+				struct pid *pid;
+				err = -ESRCH;
+				pid = find_get_pid(p->creds.pid);
+				if (!pid)
+					goto error;
+				put_pid(p->pid);
+				p->pid = pid;
+			}
+
+			if ((p->cred->euid != p->creds.uid) ||
+				(p->cred->egid != p->creds.gid)) {
+				struct cred *cred;
+				err = -ENOMEM;
+				cred = prepare_creds();
+				if (!cred)
+					goto error;
+
+				cred->uid = cred->euid = p->creds.uid;
+				cred->gid = cred->egid = p->creds.uid;
+				put_cred(p->cred);
+				p->cred = cred;
+			}
 			break;
 		default:
 			goto error;
-- 
cgit v1.2.3-58-ga151


From 7361c36c5224519b258219fe3d0e8abc865d8134 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:34:33 +0000
Subject: af_unix: Allow credentials to work across user and pid namespaces.

In unix_skb_parms store pointers to struct pid and struct cred instead
of raw uid, gid, and pid values, then translate the credentials on
reception into values that are meaningful in the receiving processes
namespaces.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_unix.h |  4 ++--
 net/unix/af_unix.c    | 53 ++++++++++++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 20725e213aee..90c9e2872f27 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -23,7 +23,8 @@ struct unix_address {
 };
 
 struct unix_skb_parms {
-	struct ucred		creds;		/* Skb credentials	*/
+	struct pid		*pid;		/* Skb credentials	*/
+	const struct cred	*cred;
 	struct scm_fp_list	*fp;		/* Passed files		*/
 #ifdef CONFIG_SECURITY_NETWORK
 	u32			secid;		/* Security ID		*/
@@ -31,7 +32,6 @@ struct unix_skb_parms {
 };
 
 #define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
-#define UNIXCREDS(skb)	(&UNIXCB((skb)).creds)
 #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e1f1349fae86..5fe9d6fe08b8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1316,18 +1316,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 	int i;
 
 	scm->fp = UNIXCB(skb).fp;
-	skb->destructor = sock_wfree;
 	UNIXCB(skb).fp = NULL;
 
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_notinflight(scm->fp->fp[i]);
 }
 
-static void unix_destruct_fds(struct sk_buff *skb)
+static void unix_destruct_scm(struct sk_buff *skb)
 {
 	struct scm_cookie scm;
 	memset(&scm, 0, sizeof(scm));
-	unix_detach_fds(&scm, skb);
+	scm.pid  = UNIXCB(skb).pid;
+	scm.cred = UNIXCB(skb).cred;
+	if (UNIXCB(skb).fp)
+		unix_detach_fds(&scm, skb);
 
 	/* Alas, it calls VFS */
 	/* So fscking what? fput() had been SMP-safe since the last Summer */
@@ -1350,10 +1352,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 
 	for (i = scm->fp->count-1; i >= 0; i--)
 		unix_inflight(scm->fp->fp[i]);
-	skb->destructor = unix_destruct_fds;
 	return 0;
 }
 
+static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
+{
+	int err = 0;
+	UNIXCB(skb).pid  = get_pid(scm->pid);
+	UNIXCB(skb).cred = get_cred(scm->cred);
+	UNIXCB(skb).fp = NULL;
+	if (scm->fp && send_fds)
+		err = unix_attach_fds(scm, skb);
+
+	skb->destructor = unix_destruct_scm;
+	return err;
+}
+
 /*
  *	Send AF_UNIX data.
  */
@@ -1410,12 +1424,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
-	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-	if (siocb->scm->fp) {
-		err = unix_attach_fds(siocb->scm, skb);
-		if (err)
-			goto out_free;
-	}
+	err = unix_scm_to_skb(siocb->scm, skb, true);
+	if (err)
+		goto out_free;
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1585,16 +1596,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 */
 		size = min_t(int, size, skb_tailroom(skb));
 
-		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
+
 		/* Only send the fds in the first buffer */
-		if (siocb->scm->fp && !fds_sent) {
-			err = unix_attach_fds(siocb->scm, skb);
-			if (err) {
-				kfree_skb(skb);
-				goto out_err;
-			}
-			fds_sent = true;
+		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
+		if (err) {
+			kfree_skb(skb);
+			goto out_err;
 		}
+		fds_sent = true;
 
 		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
 		if (err) {
@@ -1711,7 +1720,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 		siocb->scm = &tmp_scm;
 		memset(&tmp_scm, 0, sizeof(tmp_scm));
 	}
-	siocb->scm->creds = *UNIXCREDS(skb);
+	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
 	unix_set_secdata(siocb->scm, skb);
 
 	if (!(flags & MSG_PEEK)) {
@@ -1860,14 +1869,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if (memcmp(UNIXCREDS(skb), &siocb->scm->creds,
-				   sizeof(siocb->scm->creds)) != 0) {
+			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
+			    (UNIXCB(skb).cred != siocb->scm->cred)) {
 				skb_queue_head(&sk->sk_receive_queue, skb);
 				break;
 			}
 		} else {
 			/* Copy credentials */
-			siocb->scm->creds = *UNIXCREDS(skb);
+			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
 			check_creds = 1;
 		}
 
-- 
cgit v1.2.3-58-ga151


From 6616f7888c6088324727363276f6de05a1dca6fc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 13 Jun 2010 03:35:48 +0000
Subject: af_unix: Allow connecting to sockets in other network namespaces.

Remove the restriction that only allows connecting to a unix domain
socket identified by unix path that is in the same network namespace.

Crossing network namespaces is always tricky and we did not support
this at first, because of a strict policy of don't mix the namespaces.
Later after Pavel proposed this we did not support this because no one
had performed the audit to make certain using unix domain sockets
across namespaces is safe.

What fundamentally makes connecting to af_unix sockets in other
namespaces is safe is that you have to have the proper permissions on
the unix domain socket inode that lives in the filesystem.  If you
want strict isolation you just don't create inodes where unfriendlys
can get at them, or with permissions that allow unfriendlys to open
them.  All nicely handled for us by the mount namespace and other
standard file system facilities.

I looked through unix domain sockets and they are a very controlled
environment so none of the work that goes on in dev_forward_skb to
make crossing namespaces safe appears needed, we are not loosing
controll of the skb and so do not need to set up the skb to look like
it is comming in fresh from the outside world.  Further the fields in
struct unix_skb_parms should not have any problems crossing network
namespaces.

Now that we handle SCM_CREDENTIALS in a way that gives useable values
across namespaces.  There does not appear to be any operational
problems with encouraging the use of unix domain sockets across
containers either.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Daniel Lezcano <daniel.lezcano@free.fr>
Acked-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/af_unix.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5fe9d6fe08b8..75ba48b0d12a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
 	return s;
 }
 
-static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
+static struct sock *unix_find_socket_byinode(struct inode *i)
 {
 	struct sock *s;
 	struct hlist_node *node;
@@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 		struct dentry *dentry = unix_sk(s)->dentry;
 
-		if (!net_eq(sock_net(s), net))
-			continue;
-
 		if (dentry && dentry->d_inode == i) {
 			sock_hold(s);
 			goto found;
@@ -758,7 +755,7 @@ static struct sock *unix_find_other(struct net *net,
 		err = -ECONNREFUSED;
 		if (!S_ISSOCK(inode->i_mode))
 			goto put_fail;
-		u = unix_find_socket_byinode(net, inode);
+		u = unix_find_socket_byinode(inode);
 		if (!u)
 			goto put_fail;
 
-- 
cgit v1.2.3-58-ga151


From 3924773a5a82622167524bdd48799dc0452c57f8 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 16 Jun 2010 16:18:25 -0700
Subject: net: Export cred_to_ucred to modules.

AF_UNIX references this, and can be built as a module,
so...

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 0229d5566a46..fef2434b7c8c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -762,6 +762,7 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred,
 		ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid);
 	}
 }
+EXPORT_SYMBOL_GPL(cred_to_ucred);
 
 int sock_getsockopt(struct socket *sock, int level, int optname,
 		    char __user *optval, int __user *optlen)
-- 
cgit v1.2.3-58-ga151


From c68cd6cc21eb329c47ff020ff7412bf58176984e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 17 Jun 2010 06:12:26 +0200
Subject: netfilter: nf_nat: support user-specified SNAT rules in LOCAL_IN

2.6.34 introduced 'conntrack zones' to deal with cases where packets
from multiple identical networks are handled by conntrack/NAT. Packets
are looped through veth devices, during which they are NATed to private
addresses, after which they can continue normally through the stack
and possibly have NAT rules applied a second time.

This works well, but is needlessly complicated for cases where only
a single SNAT/DNAT mapping needs to be applied to these packets. In that
case, all that needs to be done is to assign each network to a seperate
zone and perform NAT as usual. However this doesn't work for packets
destined for the machine performing NAT itself since its corrently not
possible to configure SNAT mappings for the LOCAL_IN chain.

This patch adds a new INPUT chain to the NAT table and changes the
targets performing SNAT to be usable in that chain.

Example usage with two identical networks (192.168.0.0/24) on eth0/eth1:

iptables -t raw -A PREROUTING -i eth0 -j CT --zone 1
iptables -t raw -A PREROUTING -i eth0 -j MARK --set-mark 1
iptables -t raw -A PREROUTING -i eth1 -j CT --zone 2
iptabels -t raw -A PREROUTING -i eth1 -j MARK --set-mark 2

iptables -t nat -A INPUT       -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A POSTROUTING -m mark --mark 1 -j NETMAP --to 10.0.0.0/24
iptables -t nat -A INPUT       -m mark --mark 2 -j NETMAP --to 10.0.1.0/24
iptables -t nat -A POSTROUTING -m mark --mark 2 -j NETMAP --to 10.0.1.0/24

iptables -t raw -A PREROUTING -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A OUTPUT     -d 10.0.0.0/24 -j CT --zone 1
iptables -t raw -A PREROUTING -d 10.0.1.0/24 -j CT --zone 2
iptables -t raw -A OUTPUT     -d 10.0.1.0/24 -j CT --zone 2

iptables -t nat -A PREROUTING -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.0.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A PREROUTING -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24
iptables -t nat -A OUTPUT     -d 10.0.1.0/24 -j NETMAP --to 192.168.0.0/24

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_nat_rule.h    |  2 --
 net/ipv4/netfilter/ipt_NETMAP.c        |  6 ++++--
 net/ipv4/netfilter/nf_nat_rule.c       | 10 ++++++----
 net/ipv4/netfilter/nf_nat_standalone.c |  8 +-------
 4 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/netfilter/nf_nat_rule.h b/include/net/netfilter/nf_nat_rule.h
index e4a18ae361c6..2890bdc4cd92 100644
--- a/include/net/netfilter/nf_nat_rule.h
+++ b/include/net/netfilter/nf_nat_rule.h
@@ -12,6 +12,4 @@ extern int nf_nat_rule_find(struct sk_buff *skb,
 			    const struct net_device *out,
 			    struct nf_conn *ct);
 
-extern unsigned int
-alloc_null_binding(struct nf_conn *ct, unsigned int hooknum);
 #endif /* _NF_NAT_RULE_H */
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index f43867d1697f..6cdb298f1035 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -48,7 +48,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
-		     par->hooknum == NF_INET_LOCAL_OUT);
+		     par->hooknum == NF_INET_LOCAL_OUT ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 	ct = nf_ct_get(skb, &ctinfo);
 
 	netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
@@ -77,7 +78,8 @@ static struct xt_target netmap_tg_reg __read_mostly = {
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
-			  (1 << NF_INET_LOCAL_OUT),
+			  (1 << NF_INET_LOCAL_OUT) |
+			  (1 << NF_INET_LOCAL_IN),
 	.checkentry 	= netmap_tg_check,
 	.me 		= THIS_MODULE
 };
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 98ed78281aee..ebbd319f62f5 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,8 @@
 
 #define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
 			 (1 << NF_INET_POST_ROUTING) | \
-			 (1 << NF_INET_LOCAL_OUT))
+			 (1 << NF_INET_LOCAL_OUT) | \
+			 (1 << NF_INET_LOCAL_IN))
 
 static const struct xt_table nat_table = {
 	.name		= "nat",
@@ -45,7 +46,8 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 	enum ip_conntrack_info ctinfo;
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
-	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
+	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
+		     par->hooknum == NF_INET_LOCAL_IN);
 
 	ct = nf_ct_get(skb, &ctinfo);
 
@@ -99,7 +101,7 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 	return 0;
 }
 
-unsigned int
+static unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
@@ -141,7 +143,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 	.target		= ipt_snat_target,
 	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
 	.table		= "nat",
-	.hooks		= 1 << NF_INET_POST_ROUTING,
+	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
 	.checkentry	= ipt_snat_checkentry,
 	.family		= AF_INET,
 };
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 6723c682250d..95481fee8bdb 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -131,13 +131,7 @@ nf_nat_fn(unsigned int hooknum,
 		if (!nf_nat_initialized(ct, maniptype)) {
 			unsigned int ret;
 
-			if (hooknum == NF_INET_LOCAL_IN)
-				/* LOCAL_IN hook doesn't have a chain!  */
-				ret = alloc_null_binding(ct, hooknum);
-			else
-				ret = nf_nat_rule_find(skb, hooknum, in, out,
-						       ct);
-
+			ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
 			if (ret != NF_ACCEPT)
 				return ret;
 		} else
-- 
cgit v1.2.3-58-ga151


From 25442e06d20aaba7d7b16438078a562b3e4cf19b Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 15 Jun 2010 06:14:12 +0000
Subject: bridge: fdb cleanup runs too often

It is common in end-node, non STP bridges to set forwarding
delay to zero; which causes the forwarding database cleanup
to run every clock tick. Change to run only as soon as needed
or at next ageing timer interval which ever is sooner.

Use round_jiffies_up macro rather than attempting round up
by changing value.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 26637439965b..b01dde35a69e 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -128,7 +128,7 @@ void br_fdb_cleanup(unsigned long _data)
 {
 	struct net_bridge *br = (struct net_bridge *)_data;
 	unsigned long delay = hold_time(br);
-	unsigned long next_timer = jiffies + br->forward_delay;
+	unsigned long next_timer = jiffies + br->ageing_time;
 	int i;
 
 	spin_lock_bh(&br->hash_lock);
@@ -149,9 +149,7 @@ void br_fdb_cleanup(unsigned long _data)
 	}
 	spin_unlock_bh(&br->hash_lock);
 
-	/* Add HZ/4 to ensure we round the jiffies upwards to be after the next
-	 * timer, otherwise we might round down and will have no-op run. */
-	mod_timer(&br->gc_timer, round_jiffies(next_timer + HZ/4));
+	mod_timer(&br->gc_timer, round_jiffies_up(next_timer));
 }
 
 /* Completely flush all dynamic entries in forwarding database.*/
-- 
cgit v1.2.3-58-ga151


From b1c74247b9e29ae3bfdf133862328c309bc9cf14 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:38 +0000
Subject: caif: Bugfix not all services uses flow-ctrl.

Flow control is not used by all CAIF services.
The usage of flow control is now part of the gerneal
initialization function for CAIF Services.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfsrvl.h |  6 ++++--
 net/caif/cfctrl.c         |  2 +-
 net/caif/cfdbgl.c         |  2 +-
 net/caif/cfdgml.c         |  2 +-
 net/caif/cfrfml.c         | 12 ++++--------
 net/caif/cfsrvl.c         | 14 ++++++++++++--
 net/caif/cfutill.c        |  2 +-
 net/caif/cfveil.c         |  2 +-
 net/caif/cfvidl.c         |  2 +-
 9 files changed, 26 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index 2dc9eb193ecf..5d921f5e18c4 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -16,6 +16,7 @@ struct cfsrvl {
 	bool open;
 	bool phy_flow_on;
 	bool modem_flow_on;
+	bool supports_flowctrl;
 	struct dev_info dev_info;
 	struct kref ref;
 };
@@ -30,8 +31,9 @@ struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
 bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
 void cfservl_destroy(struct cflayer *layer);
 void cfsrvl_init(struct cfsrvl *service,
-		 u8 channel_id,
-		 struct dev_info *dev_info);
+			u8 channel_id,
+			struct dev_info *dev_info,
+			bool supports_flowctrl);
 bool cfsrvl_ready(struct cfsrvl *service, int *err);
 u8 cfsrvl_getphyid(struct cflayer *layer);
 
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index fcfda98a5e6d..107c4b2a311e 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -43,7 +43,7 @@ struct cflayer *cfctrl_create(void)
 	memset(&dev_info, 0, sizeof(dev_info));
 	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
-	cfsrvl_init(&this->serv, 0, &dev_info);
+	cfsrvl_init(&this->serv, 0, &dev_info, false);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
 	this->serv.layer.receive = cfctrl_recv;
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index ab6b6dc34cf8..676648cac8dd 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -22,7 +22,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(dbg, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(dbg, channel_id, dev_info);
+	cfsrvl_init(dbg, channel_id, dev_info, false);
 	dbg->layer.receive = cfdbgl_receive;
 	dbg->layer.transmit = cfdbgl_transmit;
 	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 53194840ecb6..32d9f0dc8463 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -30,7 +30,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(dgm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(dgm, channel_id, dev_info);
+	cfsrvl_init(dgm, channel_id, dev_info, true);
 	dgm->layer.receive = cfdgml_receive;
 	dgm->layer.transmit = cfdgml_transmit;
 	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index fd27b172fb5d..689cbfd0e43d 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -23,30 +23,26 @@
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl);
 
 struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
 {
 	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+
 	if (!rfm) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
+
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+
 	memset(rfm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(rfm, channel_id, dev_info);
-	rfm->layer.modemcmd = cfservl_modemcmd;
+	cfsrvl_init(rfm, channel_id, dev_info, false);
 	rfm->layer.receive = cfrfml_receive;
 	rfm->layer.transmit = cfrfml_transmit;
 	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
 	return &rfm->layer;
 }
 
-static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
-{
-       return -EPROTO;
-}
-
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
 {
 	u8 tmp;
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 6e5b7079a684..7aa1f03a0151 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -24,8 +24,10 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 				int phyid)
 {
 	struct cfsrvl *service = container_obj(layr);
+
 	caif_assert(layr->up != NULL);
 	caif_assert(layr->up->ctrlcmd != NULL);
+
 	switch (ctrl) {
 	case CAIF_CTRLCMD_INIT_RSP:
 		service->open = true;
@@ -89,9 +91,14 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
 {
 	struct cfsrvl *service = container_obj(layr);
+
 	caif_assert(layr != NULL);
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
+
+	if (!service->supports_flowctrl)
+		return 0;
+
 	switch (ctrl) {
 	case CAIF_MODEMCMD_FLOW_ON_REQ:
 		{
@@ -153,8 +160,10 @@ void cfservl_destroy(struct cflayer *layer)
 }
 
 void cfsrvl_init(struct cfsrvl *service,
-		 u8 channel_id,
-		 struct dev_info *dev_info)
+			u8 channel_id,
+			struct dev_info *dev_info,
+			bool supports_flowctrl
+			)
 {
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	service->open = false;
@@ -164,6 +173,7 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.ctrlcmd = cfservl_ctrlcmd;
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
+	service->supports_flowctrl = supports_flowctrl;
 	kref_init(&service->ref);
 }
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 5fd2c9ea8b42..ce525cac9064 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -31,7 +31,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(util, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(util, channel_id, dev_info);
+	cfsrvl_init(util, channel_id, dev_info, true);
 	util->layer.receive = cfutill_receive;
 	util->layer.transmit = cfutill_transmit;
 	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index e04f7d964e83..637cb0eee13c 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -30,7 +30,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
 	}
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 	memset(vei, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(vei, channel_id, dev_info);
+	cfsrvl_init(vei, channel_id, dev_info, true);
 	vei->layer.receive = cfvei_receive;
 	vei->layer.transmit = cfvei_transmit;
 	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index 89ad4ea239f1..ada6ee2d48f5 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -27,7 +27,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
 	caif_assert(offsetof(struct cfsrvl, layer) == 0);
 
 	memset(vid, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(vid, channel_id, dev_info);
+	cfsrvl_init(vid, channel_id, dev_info, false);
 	vid->layer.receive = cfvidl_receive;
 	vid->layer.transmit = cfvidl_transmit;
 	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
-- 
cgit v1.2.3-58-ga151


From a7da1f55a826c621251874e7684c234972fc3216 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:39 +0000
Subject: caif: Bugfix - RFM must support segmentation.

CAIF Remote File Manager may send or receive more than 4050 bytes.
Due to this The CAIF RFM service have to support segmentation.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfsrvl.h |   9 +-
 net/caif/caif_socket.c    |   4 -
 net/caif/cfcnfg.c         |   4 +-
 net/caif/cfpkt_skbuff.c   |   1 -
 net/caif/cfrfml.c         | 314 ++++++++++++++++++++++++++++++++++++++--------
 net/caif/cfsrvl.c         |  13 +-
 6 files changed, 278 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index 5d921f5e18c4..b1fa87ee0992 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -17,6 +17,7 @@ struct cfsrvl {
 	bool phy_flow_on;
 	bool modem_flow_on;
 	bool supports_flowctrl;
+	void (*release)(struct kref *);
 	struct dev_info dev_info;
 	struct kref ref;
 };
@@ -26,7 +27,8 @@ struct cflayer *cfvei_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfdgml_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfutill_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfvidl_create(u8 linkid, struct dev_info *dev_info);
-struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info);
+struct cflayer *cfrfml_create(u8 linkid, struct dev_info *dev_info,
+				int mtu_size);
 struct cflayer *cfdbgl_create(u8 linkid, struct dev_info *dev_info);
 bool cfsrvl_phyid_match(struct cflayer *layer, int phyid);
 void cfservl_destroy(struct cflayer *layer);
@@ -52,7 +54,10 @@ static inline void cfsrvl_put(struct cflayer *layr)
 	if (layr == NULL)
 		return;
 	s = container_of(layr, struct cfsrvl, layer);
-	kref_put(&s->ref, cfsrvl_release);
+
+	WARN_ON(!s->release);
+	if (s->release)
+		kref_put(&s->ref, s->release);
 }
 
 #endif				/* CFSRVL_H_ */
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 791249316ef3..848ae755cdd6 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -596,10 +596,6 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
 
-	ret = -EMSGSIZE;
-	if (buffer_size > CAIF_MAX_PAYLOAD_SIZE)
-		goto err;
-
 	timeo = sock_sndtimeo(sk, noblock);
 	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
 				1, timeo, &ret);
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 7c81974a45c4..cff2dcb9efe4 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -22,6 +22,7 @@
 #define PHY_NAME_LEN 20
 
 #define container_obj(layr) container_of(layr, struct cfcnfg, layer)
+#define RFM_FRAGMENT_SIZE 4030
 
 /* Information about CAIF physical interfaces held by Config Module in order
  * to manage physical interfaces
@@ -328,7 +329,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
-		servicel = cfrfml_create(channel_id, &phyinfo->dev_info);
+		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
+						RFM_FRAGMENT_SIZE);
 		break;
 	case CFCTRL_SRV_UTIL:
 		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index a6fdf899741a..318b0f4b416e 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -338,7 +338,6 @@ struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
 	u16 dstlen;
 	u16 createlen;
 	if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
-		cfpkt_destroy(addpkt);
 		return dstpkt;
 	}
 	if (expectlen > addlen)
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 689cbfd0e43d..4b04d25b6a3f 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -7,98 +7,304 @@
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/unaligned/le_byteshift.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfsrvl.h>
 #include <net/caif/cfpkt.h>
 
-#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
-
+#define container_obj(layr) container_of(layr, struct cfrfml, serv.layer)
 #define RFM_SEGMENTATION_BIT 0x01
-#define RFM_PAYLOAD  0x00
-#define RFM_CMD_BIT  0x80
-#define RFM_FLOW_OFF 0x81
-#define RFM_FLOW_ON  0x80
-#define RFM_SET_PIN  0x82
-#define RFM_CTRL_PKT_SIZE 1
+#define RFM_HEAD_SIZE 7
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
 
-struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
+struct cfrfml {
+	struct cfsrvl serv;
+	struct cfpkt *incomplete_frm;
+	int fragment_size;
+	u8  seghead[6];
+	u16 pdu_size;
+	/* Protects serialized processing of packets */
+	spinlock_t sync;
+};
+
+static void cfrfml_release(struct kref *kref)
+{
+	struct cfsrvl *srvl = container_of(kref, struct cfsrvl, ref);
+	struct cfrfml *rfml = container_obj(&srvl->layer);
+
+	if (rfml->incomplete_frm)
+		cfpkt_destroy(rfml->incomplete_frm);
+
+	kfree(srvl);
+}
+
+struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
+					int mtu_size)
 {
-	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	int tmp;
+	struct cfrfml *this =
+		kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
 
-	if (!rfm) {
+	if (!this) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
 
-	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	cfsrvl_init(&this->serv, channel_id, dev_info, false);
+	this->serv.release = cfrfml_release;
+	this->serv.layer.receive = cfrfml_receive;
+	this->serv.layer.transmit = cfrfml_transmit;
+
+	/* Round down to closest multiple of 16 */
+	tmp = (mtu_size - RFM_HEAD_SIZE - 6) / 16;
+	tmp *= 16;
+
+	this->fragment_size = tmp;
+	spin_lock_init(&this->sync);
+	snprintf(this->serv.layer.name, CAIF_LAYER_NAME_SZ,
+		"rfm%d", channel_id);
+
+	return &this->serv.layer;
+}
+
+static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead,
+			struct cfpkt *pkt, int *err)
+{
+	struct cfpkt *tmppkt;
+	*err = -EPROTO;
+	/* n-th but not last segment */
+
+	if (cfpkt_extr_head(pkt, seghead, 6) < 0)
+		return NULL;
+
+	/* Verify correct header */
+	if (memcmp(seghead, rfml->seghead, 6) != 0)
+		return NULL;
+
+	tmppkt = cfpkt_append(rfml->incomplete_frm, pkt,
+			rfml->pdu_size + RFM_HEAD_SIZE);
+
+	/* If cfpkt_append failes input pkts are not freed */
+	*err = -ENOMEM;
+	if (tmppkt == NULL)
+		return NULL;
 
-	memset(rfm, 0, sizeof(struct cfsrvl));
-	cfsrvl_init(rfm, channel_id, dev_info, false);
-	rfm->layer.receive = cfrfml_receive;
-	rfm->layer.transmit = cfrfml_transmit;
-	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
-	return &rfm->layer;
+	*err = 0;
+	return tmppkt;
 }
 
 static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
 {
 	u8 tmp;
 	bool segmented;
-	int ret;
+	int err;
+	u8 seghead[6];
+	struct cfrfml *rfml;
+	struct cfpkt *tmppkt = NULL;
+
 	caif_assert(layr->up != NULL);
 	caif_assert(layr->receive != NULL);
+	rfml = container_obj(layr);
+	spin_lock(&rfml->sync);
+
+	err = -EPROTO;
+	if (cfpkt_extr_head(pkt, &tmp, 1) < 0)
+		goto out;
+	segmented = tmp & RFM_SEGMENTATION_BIT;
+
+	if (segmented) {
+		if (rfml->incomplete_frm == NULL) {
+			/* Initial Segment */
+			if (cfpkt_peek_head(pkt, rfml->seghead, 6) < 0)
+				goto out;
+
+			rfml->pdu_size = get_unaligned_le16(rfml->seghead+4);
+
+			if (cfpkt_erroneous(pkt))
+				goto out;
+			rfml->incomplete_frm = pkt;
+			pkt = NULL;
+		} else {
+
+			tmppkt = rfm_append(rfml, seghead, pkt, &err);
+			if (tmppkt == NULL)
+				goto out;
+
+			if (cfpkt_erroneous(tmppkt))
+				goto out;
+
+			rfml->incomplete_frm = tmppkt;
+
+
+			if (cfpkt_erroneous(tmppkt))
+				goto out;
+		}
+		err = 0;
+		goto out;
+	}
+
+	if (rfml->incomplete_frm) {
+
+		/* Last Segment */
+		tmppkt = rfm_append(rfml, seghead, pkt, &err);
+		if (tmppkt == NULL)
+			goto out;
+
+		if (cfpkt_erroneous(tmppkt))
+			goto out;
+
+		rfml->incomplete_frm = NULL;
+		pkt = tmppkt;
+		tmppkt = NULL;
+
+		/* Verify that length is correct */
+		err = EPROTO;
+		if (rfml->pdu_size != cfpkt_getlen(pkt) - RFM_HEAD_SIZE + 1)
+			goto out;
+	}
+
+	err = rfml->serv.layer.up->receive(rfml->serv.layer.up, pkt);
+
+out:
+
+	if (err != 0) {
+		if (tmppkt)
+			cfpkt_destroy(tmppkt);
+		if (pkt)
+			cfpkt_destroy(pkt);
+		if (rfml->incomplete_frm)
+			cfpkt_destroy(rfml->incomplete_frm);
+		rfml->incomplete_frm = NULL;
+
+		pr_info("CAIF: %s(): "
+				"Connection error %d triggered on RFM link\n",
+				__func__, err);
+
+		/* Trigger connection error upon failure.*/
+		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
+					rfml->serv.dev_info.id);
+	}
+	spin_unlock(&rfml->sync);
+	return err;
+}
+
+
+static int cfrfml_transmit_segment(struct cfrfml *rfml, struct cfpkt *pkt)
+{
+	caif_assert(!cfpkt_getlen(pkt) < rfml->fragment_size);
+
+	/* Add info for MUX-layer to route the packet out. */
+	cfpkt_info(pkt)->channel_id = rfml->serv.layer.id;
 
 	/*
-	 * RFM is taking care of segmentation and stripping of
-	 * segmentation bit.
+	 * To optimize alignment, we add up the size of CAIF header before
+	 * payload.
 	 */
-	if (cfpkt_extr_head(pkt, &tmp, 1) < 0) {
-		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
-		cfpkt_destroy(pkt);
-		return -EPROTO;
-	}
-	segmented = tmp & RFM_SEGMENTATION_BIT;
-	caif_assert(!segmented);
+	cfpkt_info(pkt)->hdr_len = RFM_HEAD_SIZE;
+	cfpkt_info(pkt)->dev_info = &rfml->serv.dev_info;
 
-	ret = layr->up->receive(layr->up, pkt);
-	return ret;
+	return rfml->serv.layer.dn->transmit(rfml->serv.layer.dn, pkt);
 }
 
 static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 {
-	u8 tmp = 0;
-	int ret;
-	struct cfsrvl *service = container_obj(layr);
+	int err;
+	u8 seg;
+	u8 head[6];
+	struct cfpkt *rearpkt = NULL;
+	struct cfpkt *frontpkt = pkt;
+	struct cfrfml *rfml = container_obj(layr);
 
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
 
-	if (!cfsrvl_ready(service, &ret))
-		return ret;
+	if (!cfsrvl_ready(&rfml->serv, &err))
+		return err;
+
+	err = -EPROTO;
+	if (cfpkt_getlen(pkt) <= RFM_HEAD_SIZE-1)
+		goto out;
+
+	err = 0;
+	if (cfpkt_getlen(pkt) > rfml->fragment_size + RFM_HEAD_SIZE)
+		err = cfpkt_peek_head(pkt, head, 6);
+
+	if (err < 0)
+		goto out;
+
+	while (cfpkt_getlen(frontpkt) > rfml->fragment_size + RFM_HEAD_SIZE) {
+
+		seg = 1;
+		err = -EPROTO;
+
+		if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
+			goto out;
+		/*
+		 * On OOM error cfpkt_split returns NULL.
+		 *
+		 * NOTE: Segmented pdu is not correctly aligned.
+		 * This has negative performance impact.
+		 */
+
+		rearpkt = cfpkt_split(frontpkt, rfml->fragment_size);
+		if (rearpkt == NULL)
+			goto out;
+
+		err = cfrfml_transmit_segment(rfml, frontpkt);
+
+		if (err != 0)
+			goto out;
+		frontpkt = rearpkt;
+		rearpkt = NULL;
+
+		err = -ENOMEM;
+		if (frontpkt == NULL)
+			goto out;
+		err = -EPROTO;
+		if (cfpkt_add_head(frontpkt, head, 6) < 0)
+			goto out;
 
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_err("CAIF: %s():Packet too large - size=%d\n",
-			__func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
 	}
-	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
-		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
-		return -EPROTO;
+
+	seg = 0;
+	err = -EPROTO;
+
+	if (cfpkt_add_head(frontpkt, &seg, 1) < 0)
+		goto out;
+
+	err = cfrfml_transmit_segment(rfml, frontpkt);
+
+	frontpkt = NULL;
+out:
+
+	if (err != 0) {
+		pr_info("CAIF: %s(): "
+				"Connection error %d triggered on RFM link\n",
+				__func__, err);
+		/* Trigger connection error upon failure.*/
+
+		layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
+					rfml->serv.dev_info.id);
+
+		if (rearpkt)
+			cfpkt_destroy(rearpkt);
+
+		if (frontpkt && frontpkt != pkt) {
+
+			cfpkt_destroy(frontpkt);
+			/*
+			 * Socket layer will free the original packet,
+			 * but this packet may already be sent and
+			 * freed. So we have to return 0 in this case
+			 * to avoid socket layer to re-free this packet.
+			 * The return of shutdown indication will
+			 * cause connection to be invalidated anyhow.
+			 */
+			err = 0;
+		}
 	}
 
-	/* Add info for MUX-layer to route the packet out. */
-	cfpkt_info(pkt)->channel_id = service->layer.id;
-	/*
-	 * To optimize alignment, we add up the size of CAIF header before
-	 * payload.
-	 */
-	cfpkt_info(pkt)->hdr_len = 1;
-	cfpkt_info(pkt)->dev_info = &service->dev_info;
-	ret = layr->dn->transmit(layr->dn, pkt);
-	if (ret < 0)
-		cfpkt_extr_head(pkt, &tmp, 1);
-	return ret;
+	return err;
 }
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 7aa1f03a0151..4c9f147c38af 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -159,6 +159,13 @@ void cfservl_destroy(struct cflayer *layer)
 	kfree(layer);
 }
 
+void cfsrvl_release(struct kref *kref)
+{
+	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
+	pr_info("CAIF: %s(): enter\n", __func__);
+	kfree(service);
+}
+
 void cfsrvl_init(struct cfsrvl *service,
 			u8 channel_id,
 			struct dev_info *dev_info,
@@ -174,14 +181,10 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
 	service->supports_flowctrl = supports_flowctrl;
+	service->release = cfsrvl_release;
 	kref_init(&service->ref);
 }
 
-void cfsrvl_release(struct kref *kref)
-{
-	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
-	kfree(service);
-}
 
 bool cfsrvl_ready(struct cfsrvl *service, int *err)
 {
-- 
cgit v1.2.3-58-ga151


From 2aa40aef9debc77d55cc87a50d335b6fe97fbeb0 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:40 +0000
Subject: caif: Use link layer MTU instead of fixed MTU

Previously CAIF supported maximum transfer size of ~4050.
The transfer size is now calculated dynamically based on the
link layers mtu size.

Signed-off-by: Sjur Braendeland@stericsson.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/caif_serial.c |  1 -
 include/net/caif/caif_dev.h    |  8 +++--
 include/net/caif/caif_layer.h  |  6 ----
 include/net/caif/cfcnfg.h      | 16 +++++++---
 net/caif/caif_dev.c            | 12 +++++---
 net/caif/caif_socket.c         | 54 ++++++++++++++++++++++------------
 net/caif/cfcnfg.c              | 44 ++++++++++++++++++++++++---
 net/caif/cfctrl.c              |  6 ++--
 net/caif/cfdgml.c              |  5 ++++
 net/caif/cfpkt_skbuff.c        |  4 +--
 net/caif/cfserl.c              |  7 +++--
 net/caif/cfsrvl.c              |  1 -
 net/caif/cfutill.c             |  6 ----
 net/caif/cfveil.c              |  5 ----
 net/caif/chnl_net.c            | 67 +++++++++++++++++++++++++++++++++++-------
 15 files changed, 173 insertions(+), 69 deletions(-)

(limited to 'net')

diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 3e706f00a0d3..3df0c0f8b8bf 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -403,7 +403,6 @@ static void caifdev_setup(struct net_device *dev)
 	dev->type = ARPHRD_CAIF;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu = CAIF_MAX_MTU;
-	dev->hard_header_len = CAIF_NEEDED_HEADROOM;
 	dev->tx_queue_len = 0;
 	dev->destructor = free_netdev;
 	skb_queue_head_init(&serdev->head);
diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index 318ab9478a44..6da573c75d54 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -50,6 +50,9 @@ struct caif_connect_request {
  * @client_layer:	User implementation of client layer. This layer
  *			MUST have receive and control callback functions
  *			implemented.
+ * @ifindex:		Link layer interface index used for this connection.
+ * @headroom:		Head room needed by CAIF protocol.
+ * @tailroom:		Tail room needed by CAIF protocol.
  *
  * This function connects a CAIF channel. The Client must implement
  * the struct cflayer. This layer represents the Client layer and holds
@@ -59,8 +62,9 @@ struct caif_connect_request {
  * E.g. CAIF Socket will call this function for each socket it connects
  * and have one client_layer instance for each socket.
  */
-int caif_connect_client(struct caif_connect_request *config,
-			   struct cflayer *client_layer);
+int caif_connect_client(struct caif_connect_request *conn_req,
+			struct cflayer *client_layer, int *ifindex,
+			int *headroom, int *tailroom);
 
 /**
  * caif_disconnect_client - Disconnects a client from the CAIF stack.
diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
index 25c472f0e5b8..c8b07a904e78 100644
--- a/include/net/caif/caif_layer.h
+++ b/include/net/caif/caif_layer.h
@@ -15,14 +15,8 @@ struct cfpktq;
 struct caif_payload_info;
 struct caif_packet_funcs;
 
-#define CAIF_MAX_FRAMESIZE 4096
-#define CAIF_MAX_PAYLOAD_SIZE (4096 - 64)
-#define CAIF_NEEDED_HEADROOM (10)
-#define CAIF_NEEDED_TAILROOM (2)
 
 #define CAIF_LAYER_NAME_SZ 16
-#define CAIF_SUCCESS	1
-#define CAIF_FAILURE	0
 
 /**
  * caif_assert() - Assert function for CAIF.
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index 9fc2fc20b884..bd646faffa47 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -7,6 +7,7 @@
 #ifndef CFCNFG_H_
 #define CFCNFG_H_
 #include <linux/spinlock.h>
+#include <linux/netdevice.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfctrl.h>
 
@@ -73,8 +74,8 @@ void cfcnfg_remove(struct cfcnfg *cfg);
 
 void
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
-		     void *dev, struct cflayer *phy_layer, u16 *phyid,
-		     enum cfcnfg_phy_preference pref,
+		     struct net_device *dev, struct cflayer *phy_layer,
+		     u16 *phyid, enum cfcnfg_phy_preference pref,
 		     bool fcs, bool stx);
 
 /**
@@ -114,11 +115,18 @@ void cfcnfg_release_adap_layer(struct cflayer *adap_layer);
  * @param:		Link setup parameters.
  * @adap_layer:		Specify the adaptation layer; the receive and
  *			flow-control functions MUST be set in the structure.
- *
+ * @ifindex:		Link layer interface index used for this connection.
+ * @proto_head:		Protocol head-space needed by CAIF protocol,
+ *			excluding link layer.
+ * @proto_tail:		Protocol tail-space needed by CAIF protocol,
+ *			excluding link layer.
  */
 int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 			    struct cfctrl_link_param *param,
-			    struct cflayer *adap_layer);
+			    struct cflayer *adap_layer,
+			    int *ifindex,
+			    int *proto_head,
+			    int *proto_tail);
 
 /**
  * cfcnfg_get_phyid() - Get physical ID, given type.
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e2b86f1f5a47..0b586e9d1378 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -255,7 +255,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 			pref = CFPHYPREF_HIGH_BW;
 			break;
 		}
-
+		dev_hold(dev);
 		cfcnfg_add_phy_layer(get_caif_conf(),
 				     phy_type,
 				     dev,
@@ -285,6 +285,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 		caifd->layer.up->ctrlcmd(caifd->layer.up,
 					 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
 					 caifd->layer.id);
+		might_sleep();
 		res = wait_event_interruptible_timeout(caifd->event,
 					atomic_read(&caifd->in_use) == 0,
 					TIMEOUT);
@@ -300,6 +301,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 				   "Unregistering an active CAIF device: %s\n",
 				   __func__, dev->name);
 		cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
+		dev_put(dev);
 		atomic_set(&caifd->state, what);
 		break;
 
@@ -326,7 +328,8 @@ struct cfcnfg *get_caif_conf(void)
 EXPORT_SYMBOL(get_caif_conf);
 
 int caif_connect_client(struct caif_connect_request *conn_req,
-			   struct cflayer *client_layer)
+			struct cflayer *client_layer, int *ifindex,
+			int *headroom, int *tailroom)
 {
 	struct cfctrl_link_param param;
 	int ret;
@@ -334,8 +337,9 @@ int caif_connect_client(struct caif_connect_request *conn_req,
 	if (ret)
 		return ret;
 	/* Hook up the adaptation layer. */
-	return cfcnfg_add_adaptation_layer(get_caif_conf(),
-						&param, client_layer);
+	return cfcnfg_add_adaptation_layer(get_caif_conf(), &param,
+					client_layer, ifindex,
+					headroom, tailroom);
 }
 EXPORT_SYMBOL(caif_connect_client);
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 848ae755cdd6..8ce904786116 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -28,8 +28,8 @@
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(AF_CAIF);
 
-#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10)
-#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100)
+#define CAIF_DEF_SNDBUF (4096*10)
+#define CAIF_DEF_RCVBUF (4096*100)
 
 /*
  * CAIF state is re-using the TCP socket states.
@@ -76,6 +76,7 @@ struct caifsock {
 	struct caif_connect_request conn_req;
 	struct mutex readlock;
 	struct dentry *debugfs_socket_dir;
+	int headroom, tailroom, maxframe;
 };
 
 static int rx_flow_is_on(struct caifsock *cf_sk)
@@ -594,23 +595,32 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto err;
 	noblock = msg->msg_flags & MSG_DONTWAIT;
 
-	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
-
 	timeo = sock_sndtimeo(sk, noblock);
 	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
 				1, timeo, &ret);
 
+	if (ret)
+		goto err;
 	ret = -EPIPE;
 	if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
 		sock_flag(sk, SOCK_DEAD) ||
 		(sk->sk_shutdown & RCV_SHUTDOWN))
 		goto err;
 
+	/* Error if trying to write more than maximum frame size. */
+	ret = -EMSGSIZE;
+	if (len > cf_sk->maxframe && cf_sk->sk.sk_protocol != CAIFPROTO_RFM)
+		goto err;
+
+	buffer_size = len + cf_sk->headroom + cf_sk->tailroom;
+
 	ret = -ENOMEM;
 	skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
-	if (!skb)
+
+	if (!skb || skb_tailroom(skb) < buffer_size)
 		goto err;
-	skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+
+	skb_reserve(skb, cf_sk->headroom);
 
 	ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
 
@@ -641,7 +651,6 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	long timeo;
 
 	err = -EOPNOTSUPP;
-
 	if (unlikely(msg->msg_flags&MSG_OOB))
 		goto out_err;
 
@@ -658,8 +667,8 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 		size = len-sent;
 
-		if (size > CAIF_MAX_PAYLOAD_SIZE)
-			size = CAIF_MAX_PAYLOAD_SIZE;
+		if (size > cf_sk->maxframe)
+			size = cf_sk->maxframe;
 
 		/* If size is more than half of sndbuf, chop up message */
 		if (size > ((sk->sk_sndbuf >> 1) - 64))
@@ -669,14 +678,14 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			size = SKB_MAX_ALLOC;
 
 		skb = sock_alloc_send_skb(sk,
-					size + CAIF_NEEDED_HEADROOM
-					+ CAIF_NEEDED_TAILROOM,
+					size + cf_sk->headroom +
+					cf_sk->tailroom,
 					msg->msg_flags&MSG_DONTWAIT,
 					&err);
 		if (skb == NULL)
 			goto out_err;
 
-		skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+		skb_reserve(skb, cf_sk->headroom);
 		/*
 		 *	If you pass two values to the sock_alloc_send_skb
 		 *	it tries to grab the large buffer with GFP_NOFS
@@ -817,17 +826,15 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	long timeo;
 	int err;
+	int ifindex, headroom, tailroom;
+	struct net_device *dev;
+
 	lock_sock(sk);
 
 	err = -EAFNOSUPPORT;
 	if (uaddr->sa_family != AF_CAIF)
 		goto out;
 
-	err = -ESOCKTNOSUPPORT;
-	if (unlikely(!(sk->sk_type == SOCK_STREAM &&
-		       cf_sk->sk.sk_protocol == CAIFPROTO_AT) &&
-		       sk->sk_type != SOCK_SEQPACKET))
-		goto out;
 	switch (sock->state) {
 	case SS_UNCONNECTED:
 		/* Normal case, a fresh connect */
@@ -883,12 +890,23 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
 	dbfs_atomic_inc(&cnt.num_connect_req);
 	cf_sk->layer.receive = caif_sktrecv_cb;
 	err = caif_connect_client(&cf_sk->conn_req,
-				&cf_sk->layer);
+				&cf_sk->layer, &ifindex, &headroom, &tailroom);
 	if (err < 0) {
 		cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
 		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
 		goto out;
 	}
+	dev = dev_get_by_index(sock_net(sk), ifindex);
+	cf_sk->headroom = LL_RESERVED_SPACE_EXTRA(dev, headroom);
+	cf_sk->tailroom = tailroom;
+	cf_sk->maxframe = dev->mtu - (headroom + tailroom);
+	dev_put(dev);
+	if (cf_sk->maxframe < 1) {
+		pr_warning("CAIF: %s(): CAIF Interface MTU too small (%d)\n",
+			__func__, dev->mtu);
+		err = -ENODEV;
+		goto out;
+	}
 
 	err = -EINPROGRESS;
 wait_connect:
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index cff2dcb9efe4..1c29189b344d 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
+#include <linux/netdevice.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfpkt.h>
 #include <net/caif/cfcnfg.h>
@@ -42,6 +43,15 @@ struct cfcnfg_phyinfo {
 
 	/* Information about the physical device */
 	struct dev_info dev_info;
+
+	/* Interface index */
+	int ifindex;
+
+	/* Use Start of frame extension */
+	bool use_stx;
+
+	/* Use Start of frame checksum */
+	bool use_fcs;
 };
 
 struct cfcnfg {
@@ -249,9 +259,20 @@ static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
 {
 }
 
+int protohead[CFCTRL_SRV_MASK] = {
+	[CFCTRL_SRV_VEI] = 4,
+	[CFCTRL_SRV_DATAGRAM] = 7,
+	[CFCTRL_SRV_UTIL] = 4,
+	[CFCTRL_SRV_RFM] = 3,
+	[CFCTRL_SRV_DBG] = 3,
+};
+
 int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 				struct cfctrl_link_param *param,
-				struct cflayer *adap_layer)
+				struct cflayer *adap_layer,
+				int *ifindex,
+				int *proto_head,
+				int *proto_tail)
 {
 	struct cflayer *frml;
 	if (adap_layer == NULL) {
@@ -277,6 +298,14 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 		     param->phyid);
 	caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
 		     param->phyid);
+
+	*ifindex = cnfg->phy_layers[param->phyid].ifindex;
+	*proto_head =
+		protohead[param->linktype]+
+		(cnfg->phy_layers[param->phyid].use_stx ? 1 : 0);
+
+	*proto_tail = 2;
+
 	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
 	cfctrl_enum_req(cnfg->ctrl, param->phyid);
 	return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
@@ -298,6 +327,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	struct cfcnfg *cnfg = container_obj(layer);
 	struct cflayer *servicel = NULL;
 	struct cfcnfg_phyinfo *phyinfo;
+	struct net_device *netdev;
+
 	if (adapt_layer == NULL) {
 		pr_debug("CAIF: %s(): link setup response "
 				"but no client exist, send linkdown back\n",
@@ -329,8 +360,9 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
+		netdev = phyinfo->dev_info.dev;
 		servicel = cfrfml_create(channel_id, &phyinfo->dev_info,
-						RFM_FRAGMENT_SIZE);
+						netdev->mtu);
 		break;
 	case CFCTRL_SRV_UTIL:
 		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
@@ -361,8 +393,8 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 
 void
 cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
-		     void *dev, struct cflayer *phy_layer, u16 *phyid,
-		     enum cfcnfg_phy_preference pref,
+		     struct net_device *dev, struct cflayer *phy_layer,
+		     u16 *phyid, enum cfcnfg_phy_preference pref,
 		     bool fcs, bool stx)
 {
 	struct cflayer *frml;
@@ -416,6 +448,10 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
 	cnfg->phy_layers[*phyid].dev_info.dev = dev;
 	cnfg->phy_layers[*phyid].phy_layer = phy_layer;
 	cnfg->phy_layers[*phyid].phy_ref_count = 0;
+	cnfg->phy_layers[*phyid].ifindex = dev->ifindex;
+	cnfg->phy_layers[*phyid].use_stx = stx;
+	cnfg->phy_layers[*phyid].use_fcs = fcs;
+
 	phy_layer->type = phy_type;
 	frml = cffrml_create(*phyid, fcs);
 	if (!frml) {
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 107c4b2a311e..563145fdc4c3 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -19,7 +19,7 @@
 #ifdef CAIF_NO_LOOP
 static int handle_loop(struct cfctrl *ctrl,
 			      int cmd, struct cfpkt *pkt){
-	return CAIF_FAILURE;
+	return -1;
 }
 #else
 static int handle_loop(struct cfctrl *ctrl,
@@ -395,7 +395,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 	cmd = cmdrsp & CFCTRL_CMD_MASK;
 	if (cmd != CFCTRL_CMD_LINK_ERR
 	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
-		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE)
+		if (handle_loop(cfctrl, cmd, pkt) != 0)
 			cmdrsp |= CFCTRL_ERR_BIT;
 	}
 
@@ -647,6 +647,6 @@ found:
 	default:
 		break;
 	}
-	return CAIF_SUCCESS;
+	return 0;
 }
 #endif
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index 32d9f0dc8463..ed9d53aff280 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -17,6 +17,7 @@
 #define DGM_FLOW_OFF 0x81
 #define DGM_FLOW_ON  0x80
 #define DGM_CTRL_PKT_SIZE 1
+#define DGM_MTU 1500
 
 static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
 static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
@@ -89,6 +90,10 @@ static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
+	/* STE Modem cannot handle more than 1500 bytes datagrams */
+	if (cfpkt_getlen(pkt) > DGM_MTU)
+		return -EMSGSIZE;
+
 	cfpkt_add_head(pkt, &zero, 4);
 
 	/* Add info for MUX-layer to route the packet out. */
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index 318b0f4b416e..01f238ff2346 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -9,8 +9,8 @@
 #include <linux/hardirq.h>
 #include <net/caif/cfpkt.h>
 
-#define PKT_PREFIX CAIF_NEEDED_HEADROOM
-#define PKT_POSTFIX CAIF_NEEDED_TAILROOM
+#define PKT_PREFIX  16
+#define PKT_POSTFIX 2
 #define PKT_LEN_WHEN_EXTENDING 128
 #define PKT_ERROR(pkt, errmsg) do {	   \
     cfpkt_priv(pkt)->erronous = true;	   \
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 965c5baace40..a11fbd68a13d 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -14,7 +14,8 @@
 #define container_obj(layr) ((struct cfserl *) layr)
 
 #define CFSERL_STX 0x02
-#define CAIF_MINIUM_PACKET_SIZE 4
+#define SERIAL_MINIUM_PACKET_SIZE 4
+#define SERIAL_MAX_FRAMESIZE 4096
 struct cfserl {
 	struct cflayer layer;
 	struct cfpkt *incomplete_frm;
@@ -119,8 +120,8 @@ static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
 		/*
 		 * Frame error handling
 		 */
-		if (expectlen < CAIF_MINIUM_PACKET_SIZE
-		    || expectlen > CAIF_MAX_FRAMESIZE) {
+		if (expectlen < SERIAL_MINIUM_PACKET_SIZE
+		    || expectlen > SERIAL_MAX_FRAMESIZE) {
 			if (!layr->usestx) {
 				if (pkt != NULL)
 					cfpkt_destroy(pkt);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 4c9f147c38af..f40939a91211 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -162,7 +162,6 @@ void cfservl_destroy(struct cflayer *layer)
 void cfsrvl_release(struct kref *kref)
 {
 	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
-	pr_info("CAIF: %s(): enter\n", __func__);
 	kfree(service);
 }
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index ce525cac9064..02795aff57a4 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -90,12 +90,6 @@ static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
 	if (!cfsrvl_ready(service, &ret))
 		return ret;
 
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_err("CAIF: %s(): packet too large size=%d\n",
-			__func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
-	}
-
 	cfpkt_add_head(pkt, &zero, 1);
 	/* Add info for MUX-layer to route the packet out. */
 	info = cfpkt_info(pkt);
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 637cb0eee13c..77cc09faac9a 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -84,11 +84,6 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
 		return ret;
 	caif_assert(layr->dn != NULL);
 	caif_assert(layr->dn->transmit != NULL);
-	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_warning("CAIF: %s(): Packet too large - size=%d\n",
-			   __func__, cfpkt_getlen(pkt));
-		return -EOVERFLOW;
-	}
 
 	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
 		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 610966abe2dc..4293e190ec53 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -23,7 +23,7 @@
 #include <net/caif/caif_dev.h>
 
 /* GPRS PDP connection has MTU to 1500 */
-#define SIZE_MTU 1500
+#define GPRS_PDP_MTU 1500
 /* 5 sec. connect timeout */
 #define CONNECT_TIMEOUT (5 * HZ)
 #define CAIF_NET_DEFAULT_QUEUE_LEN 500
@@ -232,6 +232,8 @@ static int chnl_net_open(struct net_device *dev)
 {
 	struct chnl_net *priv = NULL;
 	int result = -1;
+	int llifindex, headroom, tailroom, mtu;
+	struct net_device *lldev;
 	ASSERT_RTNL();
 	priv = netdev_priv(dev);
 	if (!priv) {
@@ -241,41 +243,88 @@ static int chnl_net_open(struct net_device *dev)
 
 	if (priv->state != CAIF_CONNECTING) {
 		priv->state = CAIF_CONNECTING;
-		result = caif_connect_client(&priv->conn_req, &priv->chnl);
+		result = caif_connect_client(&priv->conn_req, &priv->chnl,
+					&llifindex, &headroom, &tailroom);
 		if (result != 0) {
-				priv->state = CAIF_DISCONNECTED;
 				pr_debug("CAIF: %s(): err: "
 					"Unable to register and open device,"
 					" Err:%d\n",
 					__func__,
 					result);
-				return result;
+				goto error;
+		}
+
+		lldev = dev_get_by_index(dev_net(dev), llifindex);
+
+		if (lldev == NULL) {
+			pr_debug("CAIF: %s(): no interface?\n", __func__);
+			result = -ENODEV;
+			goto error;
+		}
+
+		dev->needed_tailroom = tailroom + lldev->needed_tailroom;
+		dev->hard_header_len = headroom + lldev->hard_header_len +
+			lldev->needed_tailroom;
+
+		/*
+		 * MTU, head-room etc is not know before we have a
+		 * CAIF link layer device available. MTU calculation may
+		 * override initial RTNL configuration.
+		 * MTU is minimum of current mtu, link layer mtu pluss
+		 * CAIF head and tail, and PDP GPRS contexts max MTU.
+		 */
+		mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom));
+		mtu = min_t(int, GPRS_PDP_MTU, mtu);
+		dev_set_mtu(dev, mtu);
+		dev_put(lldev);
+
+		if (mtu < 100) {
+			pr_warning("CAIF: %s(): "
+				"CAIF Interface MTU too small (%d)\n",
+				__func__, mtu);
+			result = -ENODEV;
+			goto error;
 		}
 	}
 
+	rtnl_unlock();  /* Release RTNL lock during connect wait */
+
 	result = wait_event_interruptible_timeout(priv->netmgmt_wq,
 						priv->state != CAIF_CONNECTING,
 						CONNECT_TIMEOUT);
 
+	rtnl_lock();
+
 	if (result == -ERESTARTSYS) {
 		pr_debug("CAIF: %s(): wait_event_interruptible"
 			 " woken by a signal\n", __func__);
-		return -ERESTARTSYS;
+		result = -ERESTARTSYS;
+		goto error;
 	}
+
 	if (result == 0) {
 		pr_debug("CAIF: %s(): connect timeout\n", __func__);
 		caif_disconnect_client(&priv->chnl);
 		priv->state = CAIF_DISCONNECTED;
 		pr_debug("CAIF: %s(): state disconnected\n", __func__);
-		return -ETIMEDOUT;
+		result = -ETIMEDOUT;
+		goto error;
 	}
 
 	if (priv->state != CAIF_CONNECTED) {
 		pr_debug("CAIF: %s(): connect failed\n", __func__);
-		return -ECONNREFUSED;
+		result = -ECONNREFUSED;
+		goto error;
 	}
 	pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
 	return 0;
+
+error:
+	caif_disconnect_client(&priv->chnl);
+	priv->state = CAIF_DISCONNECTED;
+	pr_debug("CAIF: %s(): state disconnected\n", __func__);
+	return result;
+
 }
 
 static int chnl_net_stop(struct net_device *dev)
@@ -321,9 +370,7 @@ static void ipcaif_net_setup(struct net_device *dev)
 	dev->destructor = free_netdev;
 	dev->flags |= IFF_NOARP;
 	dev->flags |= IFF_POINTOPOINT;
-	dev->needed_headroom = CAIF_NEEDED_HEADROOM;
-	dev->needed_tailroom = CAIF_NEEDED_TAILROOM;
-	dev->mtu = SIZE_MTU;
+	dev->mtu = GPRS_PDP_MTU;
 	dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
 
 	priv = netdev_priv(dev);
-- 
cgit v1.2.3-58-ga151


From 69ad78208ecf4c392f3d323ed050423847c24104 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Thu, 17 Jun 2010 06:55:41 +0000
Subject: caif: Add debug connection type for CAIF.

Added new CAIF protocol type CAIFPROTO_DEBUG for accessing
CAIF debug on the ST Ericsson modems.

There are two debug servers on the modem, one for radio related
debug (CAIF_RADIO_DEBUG_SERVICE) and the other for
communication/application related debug (CAIF_COM_DEBUG_SERVICE).

The debug connection can contain trace debug printouts or
interactive debug used for debugging and test.

Debug connections can be of type STREAM or SEQPACKET.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/caif/caif_socket.h | 34 ++++++++++++++++++++++++++++++++++
 net/caif/caif_config_util.c      |  5 +++++
 2 files changed, 39 insertions(+)

(limited to 'net')

diff --git a/include/linux/caif/caif_socket.h b/include/linux/caif/caif_socket.h
index 2a61eb1beb85..d9cb19b7cff7 100644
--- a/include/linux/caif/caif_socket.h
+++ b/include/linux/caif/caif_socket.h
@@ -62,6 +62,7 @@ enum caif_channel_priority {
  * @CAIFPROTO_DATAGRAM_LOOP:	Datagram loopback channel, used for testing.
  * @CAIFPROTO_UTIL:		Utility (Psock) channel.
  * @CAIFPROTO_RFM:		Remote File Manager
+ * @CAIFPROTO_DEBUG:		Debug link
  *
  * This enum defines the CAIF Channel type to be used. This defines
  * the service to connect to on the modem.
@@ -72,6 +73,7 @@ enum caif_protocol_type {
 	CAIFPROTO_DATAGRAM_LOOP,
 	CAIFPROTO_UTIL,
 	CAIFPROTO_RFM,
+	CAIFPROTO_DEBUG,
 	_CAIFPROTO_MAX
 };
 #define	CAIFPROTO_MAX _CAIFPROTO_MAX
@@ -83,6 +85,28 @@ enum caif_protocol_type {
 enum caif_at_type {
 	CAIF_ATTYPE_PLAIN = 2
 };
+ /**
+ * enum caif_debug_type - Content selection for debug connection
+ * @CAIF_DEBUG_TRACE_INTERACTIVE: Connection will contain
+ *				both trace and interactive debug.
+ * @CAIF_DEBUG_TRACE:		Connection contains trace only.
+ * @CAIF_DEBUG_INTERACTIVE:	Connection to interactive debug.
+ */
+enum caif_debug_type {
+	CAIF_DEBUG_TRACE_INTERACTIVE = 0,
+	CAIF_DEBUG_TRACE,
+	CAIF_DEBUG_INTERACTIVE,
+};
+
+/**
+ * enum caif_debug_service - Debug Service Endpoint
+ * @CAIF_RADIO_DEBUG_SERVICE:	Debug service on the Radio sub-system
+ * @CAIF_APP_DEBUG_SERVICE:	Debug for the applications sub-system
+ */
+enum caif_debug_service {
+	CAIF_RADIO_DEBUG_SERVICE = 1,
+	CAIF_APP_DEBUG_SERVICE
+};
 
 /**
  * struct sockaddr_caif - the sockaddr structure for CAIF sockets.
@@ -109,6 +133,12 @@ enum caif_at_type {
  *
  * @u.rfm.volume:            Volume to mount.
  *
+ * @u.dbg:		      Applies when family = CAIFPROTO_DEBUG.
+ *
+ * @u.dbg.type:			     Type of debug connection to set up
+ *			      (caif_debug_type).
+ *
+ * @u.dbg.service:	      Service sub-system to connect (caif_debug_service
  * Description:
  * This structure holds the connect parameters used for setting up a
  * CAIF Channel. It defines the service to connect to on the modem.
@@ -130,6 +160,10 @@ struct sockaddr_caif {
 			__u32 connection_id;
 			char	  volume[16];
 		} rfm;				/* CAIFPROTO_RFM */
+		struct {
+			__u8  type;		/* type:enum caif_debug_type */
+			__u8  service;		/* service:caif_debug_service */
+		} dbg;				/* CAIFPROTO_DEBUG */
 	} u;
 };
 
diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
index 6f36580366f0..76ae68303d3a 100644
--- a/net/caif/caif_config_util.c
+++ b/net/caif/caif_config_util.c
@@ -79,6 +79,11 @@ int connect_req_to_link_param(struct cfcnfg *cnfg,
 		memcpy(l->u.utility.params, s->param.data,
 		       l->u.utility.paramlen);
 
+		break;
+	case CAIFPROTO_DEBUG:
+		l->linktype = CFCTRL_SRV_DBG;
+		l->endpoint = s->sockaddr.u.dbg.service;
+		l->chtype = s->sockaddr.u.dbg.type;
 		break;
 	default:
 		return -EINVAL;
-- 
cgit v1.2.3-58-ga151


From 26cde9f7e2747b6d254b704594eed87ab959afa5 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Jun 2010 01:52:25 +0000
Subject: udp: Fix bogus UFO packet generation

It has been reported that the new UFO software fallback path
fails under certain conditions with NFS.  I tracked the problem
down to the generation of UFO packets that are smaller than the
MTU.  The software fallback path simply discards these packets.

This patch fixes the problem by not generating such packets on
the UFO path.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9a4a6c96cb0d..041d41df1224 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -873,8 +873,10 @@ int ip_append_data(struct sock *sk,
 	    !exthdrlen)
 		csummode = CHECKSUM_PARTIAL;
 
+	skb = skb_peek_tail(&sk->sk_write_queue);
+
 	inet->cork.length += length;
-	if (((length> mtu) || !skb_queue_empty(&sk->sk_write_queue)) &&
+	if (((length > mtu) || (skb && skb_is_gso(skb))) &&
 	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
 		err = ip_ufo_append_data(sk, getfrag, from, length, hh_len,
@@ -892,7 +894,7 @@ int ip_append_data(struct sock *sk,
 	 * adding appropriate IP header.
 	 */
 
-	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+	if (!skb)
 		goto alloc_new_skb;
 
 	while (length > 0) {
@@ -1121,7 +1123,8 @@ ssize_t	ip_append_page(struct sock *sk, struct page *page,
 		return -EINVAL;
 
 	inet->cork.length += size;
-	if ((sk->sk_protocol == IPPROTO_UDP) &&
+	if ((size + skb->len > mtu) &&
+	    (sk->sk_protocol == IPPROTO_UDP) &&
 	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
 		skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-- 
cgit v1.2.3-58-ga151


From 26ec037f9841e49cc5c615deb8e1e73e5beab2ca Mon Sep 17 00:00:00 2001
From: Nick Chalk <nick@loadbalancer.org>
Date: Tue, 22 Jun 2010 08:07:01 +0200
Subject: IPVS: one-packet scheduling

Allow one-packet scheduling for UDP connections. When the fwmark-based or
normal virtual service is marked with '-o' or '--ops' options all
connections are created only to schedule one packet. Useful to schedule UDP
packets from same client port to different real servers. Recommended with
RR or WRR schedulers (the connections are not visible with ipvsadm -L).

Signed-off-by: Nick Chalk <nick@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ip_vs.h           |  2 ++
 net/netfilter/ipvs/ip_vs_conn.c | 10 +++++++---
 net/netfilter/ipvs/ip_vs_core.c | 20 ++++++++++++++++----
 net/netfilter/ipvs/ip_vs_ctl.c  | 10 ++++++----
 4 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index dfc170362842..9708de265bb1 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -19,6 +19,7 @@
  */
 #define IP_VS_SVC_F_PERSISTENT	0x0001		/* persistent port */
 #define IP_VS_SVC_F_HASHED	0x0002		/* hashed entry */
+#define IP_VS_SVC_F_ONEPACKET	0x0004		/* one-packet scheduling */
 
 /*
  *      Destination Server Flags
@@ -85,6 +86,7 @@
 #define IP_VS_CONN_F_SEQ_MASK	0x0600		/* in/out sequence mask */
 #define IP_VS_CONN_F_NO_CPORT	0x0800		/* no client port set yet */
 #define IP_VS_CONN_F_TEMPLATE	0x1000		/* template, not connection */
+#define IP_VS_CONN_F_ONE_PACKET	0x2000		/* forward only one packet */
 
 #define IP_VS_SCHEDNAME_MAXLEN	16
 #define IP_VS_IFNAME_MAXLEN	16
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index d8f7e8ef67b4..717e6233d50f 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -158,6 +158,9 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 	unsigned hash;
 	int ret;
 
+	if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+		return 0;
+
 	/* Hash by protocol, client address and port */
 	hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport);
 
@@ -355,8 +358,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
  */
 void ip_vs_conn_put(struct ip_vs_conn *cp)
 {
-	/* reset it expire in its timeout */
-	mod_timer(&cp->timer, jiffies+cp->timeout);
+	unsigned long t = (cp->flags & IP_VS_CONN_F_ONE_PACKET) ?
+		0 : cp->timeout;
+	mod_timer(&cp->timer, jiffies+t);
 
 	__ip_vs_conn_put(cp);
 }
@@ -649,7 +653,7 @@ static void ip_vs_conn_expire(unsigned long data)
 	/*
 	 *	unhash it if it is hashed in the conn table
 	 */
-	if (!ip_vs_conn_unhash(cp))
+	if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET))
 		goto expire_later;
 
 	/*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 1cd6e3fd058b..50907d8472a3 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -194,6 +194,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	struct ip_vs_dest *dest;
 	struct ip_vs_conn *ct;
 	__be16  dport;			/* destination port to forward */
+	__be16  flags;
 	union nf_inet_addr snet;	/* source network of the client,
 					   after masking */
 
@@ -340,6 +341,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 		dport = ports[1];
 	}
 
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph.protocol == IPPROTO_UDP)?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
 	/*
 	 *    Create a new connection according to the template
 	 */
@@ -347,7 +352,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 			    &iph.saddr, ports[0],
 			    &iph.daddr, ports[1],
 			    &dest->addr, dport,
-			    0,
+			    flags,
 			    dest);
 	if (cp == NULL) {
 		ip_vs_conn_put(ct);
@@ -377,7 +382,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_conn *cp = NULL;
 	struct ip_vs_iphdr iph;
 	struct ip_vs_dest *dest;
-	__be16 _ports[2], *pptr;
+	__be16 _ports[2], *pptr, flags;
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -407,6 +412,10 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 		return NULL;
 	}
 
+	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
+		 && iph.protocol == IPPROTO_UDP)?
+		IP_VS_CONN_F_ONE_PACKET : 0;
+
 	/*
 	 *    Create a connection entry.
 	 */
@@ -414,7 +423,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 			    &iph.saddr, pptr[0],
 			    &iph.daddr, pptr[1],
 			    &dest->addr, dest->port ? dest->port : pptr[1],
-			    0,
+			    flags,
 			    dest);
 	if (cp == NULL)
 		return NULL;
@@ -464,6 +473,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 	if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
 		int ret, cs;
 		struct ip_vs_conn *cp;
+		__u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
+				iph.protocol == IPPROTO_UDP)?
+				IP_VS_CONN_F_ONE_PACKET : 0;
 		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };
 
 		ip_vs_service_put(svc);
@@ -474,7 +486,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 				    &iph.saddr, pptr[0],
 				    &iph.daddr, pptr[1],
 				    &daddr, 0,
-				    IP_VS_CONN_F_BYPASS,
+				    IP_VS_CONN_F_BYPASS | flags,
 				    NULL);
 		if (cp == NULL)
 			return NF_DROP;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 36dc1d88c2fa..0f0c079c422a 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1864,14 +1864,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
 					   svc->scheduler->name);
 			else
 #endif
-				seq_printf(seq, "%s  %08X:%04X %s ",
+				seq_printf(seq, "%s  %08X:%04X %s %s ",
 					   ip_vs_proto_name(svc->protocol),
 					   ntohl(svc->addr.ip),
 					   ntohs(svc->port),
-					   svc->scheduler->name);
+					   svc->scheduler->name,
+					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		} else {
-			seq_printf(seq, "FWM  %08X %s ",
-				   svc->fwmark, svc->scheduler->name);
+			seq_printf(seq, "FWM  %08X %s %s",
+				   svc->fwmark, svc->scheduler->name,
+				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
 		}
 
 		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
-- 
cgit v1.2.3-58-ga151


From 600069daf792f497ae1e1da74e2fff6ac5de6c47 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 22 Jun 2010 08:13:31 +0200
Subject: netfilter: xt_IDLETIMER needs kdev_t.h

Add header file to fix build error:
net/netfilter/xt_IDLETIMER.c:276: error: implicit declaration of function 'MKDEV'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_IDLETIMER.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index e11090a0675c..be1f22e13545 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -36,6 +36,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_IDLETIMER.h>
+#include <linux/kdev_t.h>
 #include <linux/kobject.h>
 #include <linux/workqueue.h>
 #include <linux/sysfs.h>
-- 
cgit v1.2.3-58-ga151


From fe6fb552858f686f39e33d7b0a33fe56dacea0bf Mon Sep 17 00:00:00 2001
From: Arnd Hannemann <hannemann@nets.rwth-aachen.de>
Date: Tue, 22 Jun 2010 08:22:21 +0200
Subject: netfilter: fix simple typo in KConfig for netfiltert xt_TEE

Destination was spelled wrong in KConfig.

Signed-off-by: Arnd Hannemann <hannemann@nets.rwth-aachen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 413ed24a968a..21be53598987 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -515,7 +515,7 @@ config NETFILTER_XT_TARGET_RATEEST
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config NETFILTER_XT_TARGET_TEE
-	tristate '"TEE" - packet cloning to alternate destiantion'
+	tristate '"TEE" - packet cloning to alternate destination'
 	depends on NETFILTER_ADVANCED
 	depends on (IPV6 || IPV6=n)
 	depends on !NF_CONNTRACK || NF_CONNTRACK
-- 
cgit v1.2.3-58-ga151


From 6afff0caa721211e8c04bdc7627ee3bff95bcb95 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Wed, 16 Jun 2010 14:18:12 +0000
Subject: net: consolidate netif_needs_gso() checks

netif_needs_gso() is checked twice in the TX path once,
before submitting the skb to the qdisc and once after
it is dequeued from the qdisc just before calling
ndo_hard_start().  This opens a window for a user to
change the gso/tso or tx checksum settings that can
cause netif_needs_gso to be true in one check and false
in the other.

Specifically, changing TX checksum setting may cause
the warning in skb_gso_segment() to be triggered if
the checksum is calculated earlier.

This consolidates the netif_needs_gso() calls so that
the stack only checks if gso is needed in
dev_hard_start_xmit().

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 68 +++++++++++++++++++++++++++-------------------------------
 1 file changed, 32 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 5902426ef585..7f390b52caab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1895,6 +1895,22 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 		skb_orphan(skb);
 }
 
+/*
+ * Returns true if either:
+ *	1. skb has frag_list and the device doesn't support FRAGLIST, or
+ *	2. skb is fragmented and the device does not support SG, or if
+ *	   at least one of fragments is in highmem and device does not
+ *	   support DMA from it.
+ */
+static inline int skb_needs_linearize(struct sk_buff *skb,
+				      struct net_device *dev)
+{
+	return skb_is_nonlinear(skb) &&
+	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
+	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+					      illegal_highdma(dev, skb))));
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
@@ -1919,6 +1935,22 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				goto out_kfree_skb;
 			if (skb->next)
 				goto gso;
+		} else {
+			if (skb_needs_linearize(skb, dev) &&
+			    __skb_linearize(skb))
+				goto out_kfree_skb;
+
+			/* If packet is not checksummed and device does not
+			 * support checksumming for this protocol, complete
+			 * checksumming here.
+			 */
+			if (skb->ip_summed == CHECKSUM_PARTIAL) {
+				skb_set_transport_header(skb, skb->csum_start -
+					      skb_headroom(skb));
+				if (!dev_can_checksum(dev, skb) &&
+				     skb_checksum_help(skb))
+					goto out_kfree_skb;
+			}
 		}
 
 		rc = ops->ndo_start_xmit(skb, dev);
@@ -2089,22 +2121,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	return rc;
 }
 
-/*
- * Returns true if either:
- *	1. skb has frag_list and the device doesn't support FRAGLIST, or
- *	2. skb is fragmented and the device does not support SG, or if
- *	   at least one of fragments is in highmem and device does not
- *	   support DMA from it.
- */
-static inline int skb_needs_linearize(struct sk_buff *skb,
-				      struct net_device *dev)
-{
-	return skb_is_nonlinear(skb) &&
-	       ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-	        (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
-					      illegal_highdma(dev, skb))));
-}
-
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -2137,25 +2153,6 @@ int dev_queue_xmit(struct sk_buff *skb)
 	struct Qdisc *q;
 	int rc = -ENOMEM;
 
-	/* GSO will handle the following emulations directly. */
-	if (netif_needs_gso(dev, skb))
-		goto gso;
-
-	/* Convert a paged skb to linear, if required */
-	if (skb_needs_linearize(skb, dev) && __skb_linearize(skb))
-		goto out_kfree_skb;
-
-	/* If packet is not checksummed and device does not support
-	 * checksumming for this protocol, complete checksumming here.
-	 */
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		skb_set_transport_header(skb, skb->csum_start -
-					      skb_headroom(skb));
-		if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
-			goto out_kfree_skb;
-	}
-
-gso:
 	/* Disable soft irqs for various locks below. Also
 	 * stops preemption for RCU.
 	 */
@@ -2214,7 +2211,6 @@ gso:
 	rc = -ENETDOWN;
 	rcu_read_unlock_bh();
 
-out_kfree_skb:
 	kfree_skb(skb);
 	return rc;
 out:
-- 
cgit v1.2.3-58-ga151


From 406818ff347cbbdae2fb21fafd1939d00cf479c5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Jun 2010 13:00:48 -0700
Subject: bridge: 64bit rx/tx counters

Use u64_stats_sync infrastructure to provide 64bit rx/tx
counters even on 32bit hosts.

It is safe to use a single u64_stats_sync for rx and tx,
because BH is disabled on both, and we use per_cpu data.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 24 +++++++++++++++---------
 net/bridge/br_input.c   |  2 ++
 net/bridge/br_private.h | 10 ++++++----
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6f3a9279be30..edf639e96281 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -38,8 +38,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 #endif
 
+	u64_stats_update_begin(&brstats->syncp);
 	brstats->tx_packets++;
 	brstats->tx_bytes += skb->len;
+	u64_stats_update_end(&brstats->syncp);
 
 	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
@@ -96,21 +98,25 @@ static int br_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-static struct net_device_stats *br_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_device_stats *stats = &dev->stats;
-	struct br_cpu_netstats sum = { 0 };
+	struct rtnl_link_stats64 *stats = &dev->stats64;
+	struct br_cpu_netstats tmp, sum = { 0 };
 	unsigned int cpu;
 
 	for_each_possible_cpu(cpu) {
+		unsigned int start;
 		const struct br_cpu_netstats *bstats
 			= per_cpu_ptr(br->stats, cpu);
-
-		sum.tx_bytes   += bstats->tx_bytes;
-		sum.tx_packets += bstats->tx_packets;
-		sum.rx_bytes   += bstats->rx_bytes;
-		sum.rx_packets += bstats->rx_packets;
+		do {
+			start = u64_stats_fetch_begin(&bstats->syncp);
+			memcpy(&tmp, bstats, sizeof(tmp));
+		} while (u64_stats_fetch_retry(&bstats->syncp, start));
+		sum.tx_bytes   += tmp.tx_bytes;
+		sum.tx_packets += tmp.tx_packets;
+		sum.rx_bytes   += tmp.rx_bytes;
+		sum.rx_packets += tmp.rx_packets;
 	}
 
 	stats->tx_bytes   = sum.tx_bytes;
@@ -300,7 +306,7 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_open		 = br_dev_open,
 	.ndo_stop		 = br_dev_stop,
 	.ndo_start_xmit		 = br_dev_xmit,
-	.ndo_get_stats		 = br_get_stats,
+	.ndo_get_stats64	 = br_get_stats64,
 	.ndo_set_mac_address	 = br_set_mac_address,
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f076c9d79d5e..5fc1c5b1c360 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -27,8 +27,10 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	struct net_bridge *br = netdev_priv(brdev);
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
+	u64_stats_update_begin(&brstats->syncp);
 	brstats->rx_packets++;
 	brstats->rx_bytes += skb->len;
+	u64_stats_update_end(&brstats->syncp);
 
 	indev = skb->dev;
 	skb->dev = brdev;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 07cf57b373fe..3f0678fd1fd0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -16,6 +16,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_bridge.h>
 #include <linux/netpoll.h>
+#include <linux/u64_stats_sync.h>
 #include <net/route.h>
 
 #define BR_HASH_BITS 8
@@ -156,10 +157,11 @@ struct net_bridge_port
 #define br_port_exists(dev) (dev->priv_flags & IFF_BRIDGE_PORT)
 
 struct br_cpu_netstats {
-	unsigned long	rx_packets;
-	unsigned long	rx_bytes;
-	unsigned long	tx_packets;
-	unsigned long	tx_bytes;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
 };
 
 struct net_bridge
-- 
cgit v1.2.3-58-ga151


From 7b2ff18ee7b0ec4bc3162f821e221781aaca48bd Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 15 Jun 2010 01:07:31 +0000
Subject: net - IP_NODEFRAG option for IPv4 socket

this patch is implementing IP_NODEFRAG option for IPv4 socket.
The reason is, there's no other way to send out the packet with user
customized header of the reassembly part.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h                  | 1 +
 include/net/inet_sock.h             | 3 ++-
 net/ipv4/af_inet.c                  | 2 ++
 net/ipv4/ip_sockglue.c              | 9 ++++++++-
 net/ipv4/netfilter/nf_defrag_ipv4.c | 5 +++++
 5 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/in.h b/include/linux/in.h
index 583c76f9c30f..41d88a4689af 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -85,6 +85,7 @@ struct in_addr {
 #define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
 
 #define IP_MINTTL       21
+#define IP_NODEFRAG     22
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1653de515cee..1989cfd7405f 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -137,7 +137,8 @@ struct inet_sock {
 				hdrincl:1,
 				mc_loop:1,
 				transparent:1,
-				mc_all:1;
+				mc_all:1,
+				nodefrag:1;
 	int			mc_index;
 	__be32			mc_addr;
 	struct ip_mc_socklist	*mc_list;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d99e7e020189..b4c0969137cb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -355,6 +355,8 @@ lookup_protocol:
 	inet = inet_sk(sk);
 	inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
 
+	inet->nodefrag = 0;
+
 	if (SOCK_RAW == sock->type) {
 		inet->inet_num = protocol;
 		if (IPPROTO_RAW == protocol)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 47fff528ff39..6c40a8c46e79 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -465,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
 			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
-			     (1<<IP_MINTTL))) ||
+			     (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -588,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 		inet->hdrincl = val ? 1 : 0;
 		break;
+	case IP_NODEFRAG:
+		if (sk->sk_type != SOCK_RAW) {
+			err = -ENOPROTOOPT;
+			break;
+		}
+		inet->nodefrag = val ? 1 : 0;
+		break;
 	case IP_MTU_DISCOVER:
 		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
 			goto e_inval;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index cb763ae9ed90..eab8de32f200 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -66,6 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 					  const struct net_device *out,
 					  int (*okfn)(struct sk_buff *))
 {
+	struct inet_sock *inet = inet_sk(skb->sk);
+
+	if (inet && inet->nodefrag)
+		return NF_ACCEPT;
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 	/* Previously seen (loopback)?  Ignore.  Do this before
-- 
cgit v1.2.3-58-ga151


From b1312c89f0016f778cac4f1536f1434e132f8713 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Thu, 24 Jun 2010 14:35:00 -0700
Subject: xfrm: check bundle policy existance before dereferencing it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the bundle validation code to not assume having a valid policy.
When we have multiple transformations for a xfrm policy, the bundle
instance will be a chain of bundles with only the first one having
the policy reference. When policy_genid is bumped it will expire the
first bundle in the chain which is equivalent of expiring the whole
chain.

Reported-bisected-and-tested-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4bf27d901333..af1c173be4ad 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2300,7 +2300,8 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 			return 0;
 		if (xdst->xfrm_genid != dst->xfrm->genid)
 			return 0;
-		if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
+		if (xdst->num_pols > 0 &&
+		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
 			return 0;
 
 		if (strict && fl &&
-- 
cgit v1.2.3-58-ga151


From deb0d7c740a008a4e26806317497549b0e8907eb Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 24 Jun 2010 20:33:04 -0700
Subject: net: fix "netpoll: Allow netpoll_setup/cleanup recursion"

Remove rtnl_unlock() which had no corresponding rtnl_lock().

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 560297ee55b4..ca6dc31843ea 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -748,7 +748,6 @@ int __netpoll_setup(struct netpoll *np)
 
 	/* last thing to do is link it to the net device structure */
 	rcu_assign_pointer(ndev->npinfo, npinfo);
-	rtnl_unlock();
 
 	return 0;
 
-- 
cgit v1.2.3-58-ga151


From 565b7b2d2e632b5792879c0c9cccdd9eecd31195 Mon Sep 17 00:00:00 2001
From: Konstantin Khorenko <khorenko@openvz.org>
Date: Thu, 24 Jun 2010 21:54:58 -0700
Subject: tcp: do not send reset to already closed sockets

i've found that tcp_close() can be called for an already closed
socket, but still sends reset in this case (tcp_send_active_reset())
which seems to be incorrect.  Moreover, a packet with reset is sent
with different source port as original port number has been already
cleared on socket.  Besides that incrementing stat counter for
LINUX_MIB_TCPABORTONCLOSE also does not look correct in this case.

Initially this issue was found on 2.6.18-x RHEL5 kernel, but the same
seems to be true for the current mainstream kernel (checked on
2.6.35-rc3).  Please, correct me if i missed something.

How that happens:

1) the server receives a packet for socket in TCP_CLOSE_WAIT state
   that triggers a tcp_reset():

Call Trace:
 <IRQ>  [<ffffffff8025b9b9>] tcp_reset+0x12f/0x1e8
 [<ffffffff80046125>] tcp_rcv_state_process+0x1c0/0xa08
 [<ffffffff8003eb22>] tcp_v4_do_rcv+0x310/0x37a
 [<ffffffff80028bea>] tcp_v4_rcv+0x74d/0xb43
 [<ffffffff8024ef4c>] ip_local_deliver_finish+0x0/0x259
 [<ffffffff80037131>] ip_local_deliver+0x200/0x2f4
 [<ffffffff8003843c>] ip_rcv+0x64c/0x69f
 [<ffffffff80021d89>] netif_receive_skb+0x4c4/0x4fa
 [<ffffffff80032eca>] process_backlog+0x90/0xec
 [<ffffffff8000cc50>] net_rx_action+0xbb/0x1f1
 [<ffffffff80012d3a>] __do_softirq+0xf5/0x1ce
 [<ffffffff8001147a>] handle_IRQ_event+0x56/0xb0
 [<ffffffff8006334c>] call_softirq+0x1c/0x28
 [<ffffffff80070476>] do_softirq+0x2c/0x85
 [<ffffffff80070441>] do_IRQ+0x149/0x152
 [<ffffffff80062665>] ret_from_intr+0x0/0xa
 <EOI>  [<ffffffff80008a2e>] __handle_mm_fault+0x6cd/0x1303
 [<ffffffff80008903>] __handle_mm_fault+0x5a2/0x1303
 [<ffffffff80033a9d>] cache_free_debugcheck+0x21f/0x22e
 [<ffffffff8006a263>] do_page_fault+0x49a/0x7dc
 [<ffffffff80066487>] thread_return+0x89/0x174
 [<ffffffff800c5aee>] audit_syscall_exit+0x341/0x35c
 [<ffffffff80062e39>] error_exit+0x0/0x84

tcp_rcv_state_process()
...  // (sk_state == TCP_CLOSE_WAIT here)
...
        /* step 2: check RST bit */
        if(th->rst) {
                tcp_reset(sk);
                goto discard;
        }
...
---------------------------------
tcp_rcv_state_process
 tcp_reset
  tcp_done
   tcp_set_state(sk, TCP_CLOSE);
     inet_put_port
      __inet_put_port
       inet_sk(sk)->num = 0;

   sk->sk_shutdown = SHUTDOWN_MASK;

2) After that the process (socket owner) tries to write something to
   that socket and "inet_autobind" sets a _new_ (which differs from
   the original!) port number for the socket:

 Call Trace:
  [<ffffffff80255a12>] inet_bind_hash+0x33/0x5f
  [<ffffffff80257180>] inet_csk_get_port+0x216/0x268
  [<ffffffff8026bcc9>] inet_autobind+0x22/0x8f
  [<ffffffff80049140>] inet_sendmsg+0x27/0x57
  [<ffffffff8003a9d9>] do_sock_write+0xae/0xea
  [<ffffffff80226ac7>] sock_writev+0xdc/0xf6
  [<ffffffff800680c7>] _spin_lock_irqsave+0x9/0xe
  [<ffffffff8001fb49>] __pollwait+0x0/0xdd
  [<ffffffff8008d533>] default_wake_function+0x0/0xe
  [<ffffffff800a4f10>] autoremove_wake_function+0x0/0x2e
  [<ffffffff800f0b49>] do_readv_writev+0x163/0x274
  [<ffffffff80066538>] thread_return+0x13a/0x174
  [<ffffffff800145d8>] tcp_poll+0x0/0x1c9
  [<ffffffff800c56d3>] audit_syscall_entry+0x180/0x1b3
  [<ffffffff800f0dd0>] sys_writev+0x49/0xe4
  [<ffffffff800622dd>] tracesys+0xd5/0xe0

3) sendmsg fails at last with -EPIPE (=> 'write' returns -EPIPE in userspace):

F: tcp_sendmsg1 -EPIPE: sk=ffff81000bda00d0, sport=49847, old_state=7, new_state=7, sk_err=0, sk_shutdown=3

Call Trace:
 [<ffffffff80027557>] tcp_sendmsg+0xcb/0xe87
 [<ffffffff80033300>] release_sock+0x10/0xae
 [<ffffffff8016f20f>] vgacon_cursor+0x0/0x1a7
 [<ffffffff8026bd32>] inet_autobind+0x8b/0x8f
 [<ffffffff8003a9d9>] do_sock_write+0xae/0xea
 [<ffffffff80226ac7>] sock_writev+0xdc/0xf6
 [<ffffffff800680c7>] _spin_lock_irqsave+0x9/0xe
 [<ffffffff8001fb49>] __pollwait+0x0/0xdd
 [<ffffffff8008d533>] default_wake_function+0x0/0xe
 [<ffffffff800a4f10>] autoremove_wake_function+0x0/0x2e
 [<ffffffff800f0b49>] do_readv_writev+0x163/0x274
 [<ffffffff80066538>] thread_return+0x13a/0x174
 [<ffffffff800145d8>] tcp_poll+0x0/0x1c9
 [<ffffffff800c56d3>] audit_syscall_entry+0x180/0x1b3
 [<ffffffff800f0dd0>] sys_writev+0x49/0xe4
 [<ffffffff800622dd>] tracesys+0xd5/0xe0

tcp_sendmsg()
...
        /* Wait for a connection to finish. */
        if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
                int old_state = sk->sk_state;
                if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) {
if (f_d && (err == -EPIPE)) {
        printk("F: tcp_sendmsg1 -EPIPE: sk=%p, sport=%u, old_state=%d, new_state=%d, "
                "sk_err=%d, sk_shutdown=%d\n",
                sk, ntohs(inet_sk(sk)->sport), old_state, sk->sk_state,
                sk->sk_err, sk->sk_shutdown);
        dump_stack();
}
                        goto out_err;
                }
        }
...

4) Then the process (socket owner) understands that it's time to close
   that socket and does that (and thus triggers sending reset packet):

Call Trace:
...
 [<ffffffff80032077>] dev_queue_xmit+0x343/0x3d6
 [<ffffffff80034698>] ip_output+0x351/0x384
 [<ffffffff80251ae9>] dst_output+0x0/0xe
 [<ffffffff80036ec6>] ip_queue_xmit+0x567/0x5d2
 [<ffffffff80095700>] vprintk+0x21/0x33
 [<ffffffff800070f0>] check_poison_obj+0x2e/0x206
 [<ffffffff80013587>] poison_obj+0x36/0x45
 [<ffffffff8025dea6>] tcp_send_active_reset+0x15/0x14d
 [<ffffffff80023481>] dbg_redzone1+0x1c/0x25
 [<ffffffff8025dea6>] tcp_send_active_reset+0x15/0x14d
 [<ffffffff8000ca94>] cache_alloc_debugcheck_after+0x189/0x1c8
 [<ffffffff80023405>] tcp_transmit_skb+0x764/0x786
 [<ffffffff8025df8a>] tcp_send_active_reset+0xf9/0x14d
 [<ffffffff80258ff1>] tcp_close+0x39a/0x960
 [<ffffffff8026be12>] inet_release+0x69/0x80
 [<ffffffff80059b31>] sock_release+0x4f/0xcf
 [<ffffffff80059d4c>] sock_close+0x2c/0x30
 [<ffffffff800133c9>] __fput+0xac/0x197
 [<ffffffff800252bc>] filp_close+0x59/0x61
 [<ffffffff8001eff6>] sys_close+0x85/0xc7
 [<ffffffff800622dd>] tracesys+0xd5/0xe0

So, in brief:

* a received packet for socket in TCP_CLOSE_WAIT state triggers
  tcp_reset() which clears inet_sk(sk)->num and put socket into
  TCP_CLOSE state

* an attempt to write to that socket forces inet_autobind() to get a
  new port (but the write itself fails with -EPIPE)

* tcp_close() called for socket in TCP_CLOSE state sends an active
  reset via socket with newly allocated port

This adds an additional check in tcp_close() for already closed
sockets. We do not want to send anything to closed sockets.

Signed-off-by: Konstantin Khorenko <khorenko@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 779d40c3b96e..d5f84bd5a455 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1898,6 +1898,10 @@ void tcp_close(struct sock *sk, long timeout)
 
 	sk_mem_reclaim(sk);
 
+	/* If socket has been already reset (e.g. in tcp_reset()) - kill it. */
+	if (sk->sk_state == TCP_CLOSE)
+		goto adjudge_to_death;
+
 	/* As outlined in RFC 2525, section 2.17, we send a RST here because
 	 * data was lost. To witness the awful effects of the old behavior of
 	 * always doing a FIN, run an older 2.1.x kernel or 2.0.x, start a bulk
-- 
cgit v1.2.3-58-ga151


From 1a61a83ff59378a5613d8c706c4a660c353b62a8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 18 Jun 2010 14:24:00 +0000
Subject: Bluetooth: Bring back var 'i' increment

commit ff6e2163f28a1094fb5ca5950fe2b43c3cf6bc7a accidentally added a
regression on the bnep code. Fixing it.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bluetooth/bnep/netdev.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 0faad5ce6dc4..8c100c9dae28 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -104,6 +104,8 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 				break;
 			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
 			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
+
+			i++;
 		}
 		r->len = htons(skb->len - len);
 	}
-- 
cgit v1.2.3-58-ga151


From a8756201ba4189bca3ee1a6ec4e290f467ee09ab Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Fri, 25 Jun 2010 14:44:07 +0200
Subject: netfilter: xt_connbytes: Force CT accounting to be enabled

Check at rule install time that CT accounting is enabled. Force it
to be enabled if not while also emitting a warning since this is not
the default state.

This is in preparation for deprecating CONFIG_NF_CT_ACCT upon which
CONFIG_NETFILTER_XT_MATCH_CONNBYTES depended being set.

Added 2 CT accounting support functions:

nf_ct_acct_enabled() - Get CT accounting state.
nf_ct_set_acct() - Enable/disable CT accountuing.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Acked-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack_acct.h | 12 ++++++++++++
 net/netfilter/xt_connbytes.c              | 10 ++++++++++
 2 files changed, 22 insertions(+)

(limited to 'net')

diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 03e218f0be43..4e9c63a20db2 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -45,6 +45,18 @@ struct nf_conn_counter *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
 extern unsigned int
 seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir);
 
+/* Check if connection tracking accounting is enabled */
+static inline bool nf_ct_acct_enabled(struct net *net)
+{
+	return net->ct.sysctl_acct != 0;
+}
+
+/* Enable/disable connection tracking accounting */
+static inline void nf_ct_set_acct(struct net *net, bool enable)
+{
+	net->ct.sysctl_acct = enable;
+}
+
 extern int nf_conntrack_acct_init(struct net *net);
 extern void nf_conntrack_acct_fini(struct net *net);
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 73517835303d..5b138506690e 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -112,6 +112,16 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
+
+	/*
+	 * This filter cannot function correctly unless connection tracking
+	 * accounting is enabled, so complain in the hope that someone notices.
+	 */
+	if (!nf_ct_acct_enabled(par->net)) {
+		pr_warning("Forcing CT accounting to be enabled\n");
+		nf_ct_set_acct(par->net, true);
+	}
+
 	return ret;
 }
 
-- 
cgit v1.2.3-58-ga151


From d70a011dbbaa6335a19deb63ec3eb613f48faafd Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Fri, 25 Jun 2010 14:46:56 +0200
Subject: netfilter: complete the deprecation of CONFIG_NF_CT_ACCT

CONFIG_NF_CT_ACCT has been deprecated for awhile and
was originally scheduled for removal by 2.6.29.

Removing support for this config option also stops
this deprecation warning message in the kernel log.

[   61.669627] nf_conntrack version 0.5.0 (16384 buckets, 65536 max)
[   61.669850] CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use
[   61.669852] nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or
[   61.669853] sysctl net.netfilter.nf_conntrack_acct=1 to enable it.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
[Patrick: changed default value to 0]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 Documentation/feature-removal-schedule.txt |  9 ---------
 Documentation/kernel-parameters.txt        |  3 +--
 net/netfilter/Kconfig                      | 22 ----------------------
 net/netfilter/nf_conntrack_acct.c          | 14 +-------------
 4 files changed, 2 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 672be0109d02..92f021aac92c 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -303,15 +303,6 @@ Who:	Johannes Berg <johannes@sipsolutions.net>
 
 ---------------------------
 
-What: CONFIG_NF_CT_ACCT
-When: 2.6.29
-Why:  Accounting can now be enabled/disabled without kernel recompilation.
-      Currently used only to set a default value for a feature that is also
-      controlled by a kernel/module/sysfs/sysctl parameter.
-Who:  Krzysztof Piotr Oledzki <ole@ans.pl>
-
----------------------------
-
 What:	sysfs ui for changing p4-clockmod parameters
 When:	September 2009
 Why:	See commits 129f8ae9b1b5be94517da76009ea956e89104ce8 and
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1808f1157f30..cee62515ffa1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1597,8 +1597,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			[NETFILTER] Enable connection tracking flow accounting
 			0 to disable accounting
 			1 to enable accounting
-			Default value depends on CONFIG_NF_CT_ACCT that is
-			going to be removed in 2.6.29.
+			Default value is 0.
 
 	nfsaddrs=	[NFS]
 			See Documentation/filesystems/nfs/nfsroot.txt.
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 21be53598987..aa2f106347e4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -40,27 +40,6 @@ config NF_CONNTRACK
 
 if NF_CONNTRACK
 
-config NF_CT_ACCT
-	bool "Connection tracking flow accounting"
-	depends on NETFILTER_ADVANCED
-	help
-	  If this option is enabled, the connection tracking code will
-	  keep per-flow packet and byte counters.
-
-	  Those counters can be used for flow-based accounting or the
-	  `connbytes' match.
-
-	  Please note that currently this option only sets a default state.
-	  You may change it at boot time with nf_conntrack.acct=0/1 kernel
-	  parameter or by loading the nf_conntrack module with acct=0/1.
-
-	  You may also disable/enable it on a running system with:
-	   sysctl net.netfilter.nf_conntrack_acct=0/1
-
-	  This option will be removed in 2.6.29.
-
-	  If unsure, say `N'.
-
 config NF_CONNTRACK_MARK
 	bool  'Connection mark tracking support'
 	depends on NETFILTER_ADVANCED
@@ -630,7 +609,6 @@ config NETFILTER_XT_MATCH_CONNBYTES
 	tristate  '"connbytes" per-connection counter match support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
-	select NF_CT_ACCT
 	help
 	  This option adds a `connbytes' match, which allows you to match the
 	  number of bytes and/or packets for each direction within a connection.
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index ab81b380eae6..5178c691ecbf 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -17,13 +17,7 @@
 #include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 
-#ifdef CONFIG_NF_CT_ACCT
-#define NF_CT_ACCT_DEFAULT 1
-#else
-#define NF_CT_ACCT_DEFAULT 0
-#endif
-
-static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT;
+static int nf_ct_acct __read_mostly;
 
 module_param_named(acct, nf_ct_acct, bool, 0644);
 MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
@@ -114,12 +108,6 @@ int nf_conntrack_acct_init(struct net *net)
 	net->ct.sysctl_acct = nf_ct_acct;
 
 	if (net_eq(net, &init_net)) {
-#ifdef CONFIG_NF_CT_ACCT
-	printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n");
-		printk(KERN_WARNING "nf_conntrack.acct=1 kernel parameter, acct=1 nf_conntrack module option or\n");
-		printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n");
-#endif
-
 		ret = nf_ct_extend_register(&acct_extend);
 		if (ret < 0) {
 			printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n");
-- 
cgit v1.2.3-58-ga151


From 9f888160bdcccf0565dd2774956b8d9456e610be Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 21 Jun 2010 11:00:13 +0000
Subject: ipv6: fix NULL reference in proxy neighbor discovery

The addition of TLLAO option created a kernel OOPS regression
for the case where neighbor advertisement is being sent via
proxy path.  When using proxy, ipv6_get_ifaddr() returns NULL
causing the NULL dereference.

Change causing the bug was:
commit f7734fdf61ec6bb848e0bafc1fb8bad2c124bb50
Author: Octavian Purdila <opurdila@ixiacom.com>
Date:   Fri Oct 2 11:39:15 2009 +0000

    make TLLAO option for NA packets configurable

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ndisc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 0abdc242ddb7..2efef52fb461 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -586,6 +586,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 		src_addr = solicited_addr;
 		if (ifp->flags & IFA_F_OPTIMISTIC)
 			override = 0;
+		inc_opt |= ifp->idev->cnf.force_tllao;
 		in6_ifa_put(ifp);
 	} else {
 		if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
@@ -599,7 +600,6 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
 	icmp6h.icmp6_solicited = solicited;
 	icmp6h.icmp6_override = override;
 
-	inc_opt |= ifp->idev->cnf.force_tllao;
 	__ndisc_send(dev, neigh, daddr, src_addr,
 		     &icmp6h, solicited_addr,
 		     inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
-- 
cgit v1.2.3-58-ga151


From 01f2f3f6ef4d076c0c10a8a7b42624416d56b523 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Sat, 19 Jun 2010 17:05:36 +0000
Subject: net: optimize Berkeley Packet Filter (BPF) processing

Gcc is currenlty not in the ability to optimize the switch statement in
sk_run_filter() because of dense case labels. This patch replace the
OR'd labels with ordered sequenced case labels. The sk_chk_filter()
function is modified to patch/replace the original OPCODES in a
ordered but equivalent form. gcc is now in the ability to transform the
switch statement in sk_run_filter into a jump table of complexity O(1).

Until this patch gcc generates a sequence of conditional branches (O(n) of 567
byte .text segment size (arch x86_64):

7ff: 8b 06                 mov    (%rsi),%eax
801: 66 83 f8 35           cmp    $0x35,%ax
805: 0f 84 d0 02 00 00     je     adb <sk_run_filter+0x31d>
80b: 0f 87 07 01 00 00     ja     918 <sk_run_filter+0x15a>
811: 66 83 f8 15           cmp    $0x15,%ax
815: 0f 84 c5 02 00 00     je     ae0 <sk_run_filter+0x322>
81b: 77 73                 ja     890 <sk_run_filter+0xd2>
81d: 66 83 f8 04           cmp    $0x4,%ax
821: 0f 84 17 02 00 00     je     a3e <sk_run_filter+0x280>
827: 77 29                 ja     852 <sk_run_filter+0x94>
829: 66 83 f8 01           cmp    $0x1,%ax
[...]

With the modification the compiler translate the switch statement into
the following jump table fragment:

7ff: 66 83 3e 2c           cmpw   $0x2c,(%rsi)
803: 0f 87 1f 02 00 00     ja     a28 <sk_run_filter+0x26a>
809: 0f b7 06              movzwl (%rsi),%eax
80c: ff 24 c5 00 00 00 00  jmpq   *0x0(,%rax,8)
813: 44 89 e3              mov    %r12d,%ebx
816: e9 43 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>
81b: 41 89 dc              mov    %ebx,%r12d
81e: e9 3b 03 00 00        jmpq   b5e <sk_run_filter+0x3a0>

Furthermore, I reordered the instructions to reduce cache line misses by
order the most common instruction to the start.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  48 +++++++++++
 net/core/filter.c      | 212 +++++++++++++++++++++++++++++++++++++------------
 2 files changed, 209 insertions(+), 51 deletions(-)

(limited to 'net')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 151f5d703b7e..69b43dbea6c6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -91,6 +91,54 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define         BPF_TAX         0x00
 #define         BPF_TXA         0x80
 
+enum {
+	BPF_S_RET_K = 0,
+	BPF_S_RET_A,
+	BPF_S_ALU_ADD_K,
+	BPF_S_ALU_ADD_X,
+	BPF_S_ALU_SUB_K,
+	BPF_S_ALU_SUB_X,
+	BPF_S_ALU_MUL_K,
+	BPF_S_ALU_MUL_X,
+	BPF_S_ALU_DIV_X,
+	BPF_S_ALU_AND_K,
+	BPF_S_ALU_AND_X,
+	BPF_S_ALU_OR_K,
+	BPF_S_ALU_OR_X,
+	BPF_S_ALU_LSH_K,
+	BPF_S_ALU_LSH_X,
+	BPF_S_ALU_RSH_K,
+	BPF_S_ALU_RSH_X,
+	BPF_S_ALU_NEG,
+	BPF_S_LD_W_ABS,
+	BPF_S_LD_H_ABS,
+	BPF_S_LD_B_ABS,
+	BPF_S_LD_W_LEN,
+	BPF_S_LD_W_IND,
+	BPF_S_LD_H_IND,
+	BPF_S_LD_B_IND,
+	BPF_S_LD_IMM,
+	BPF_S_LDX_W_LEN,
+	BPF_S_LDX_B_MSH,
+	BPF_S_LDX_IMM,
+	BPF_S_MISC_TAX,
+	BPF_S_MISC_TXA,
+	BPF_S_ALU_DIV_K,
+	BPF_S_LD_MEM,
+	BPF_S_LDX_MEM,
+	BPF_S_ST,
+	BPF_S_STX,
+	BPF_S_JMP_JA,
+	BPF_S_JMP_JEQ_K,
+	BPF_S_JMP_JEQ_X,
+	BPF_S_JMP_JGE_K,
+	BPF_S_JMP_JGE_X,
+	BPF_S_JMP_JGT_K,
+	BPF_S_JMP_JGT_X,
+	BPF_S_JMP_JSET_K,
+	BPF_S_JMP_JSET_X,
+};
+
 #ifndef BPF_MAXINSNS
 #define BPF_MAXINSNS 4096
 #endif
diff --git a/net/core/filter.c b/net/core/filter.c
index da69fb728d32..52b051f82a01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -128,87 +128,87 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 		fentry = &filter[pc];
 
 		switch (fentry->code) {
-		case BPF_ALU|BPF_ADD|BPF_X:
+		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
-		case BPF_ALU|BPF_ADD|BPF_K:
+		case BPF_S_ALU_ADD_K:
 			A += fentry->k;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_X:
+		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
-		case BPF_ALU|BPF_SUB|BPF_K:
+		case BPF_S_ALU_SUB_K:
 			A -= fentry->k;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_X:
+		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
-		case BPF_ALU|BPF_MUL|BPF_K:
+		case BPF_S_ALU_MUL_K:
 			A *= fentry->k;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_X:
+		case BPF_S_ALU_DIV_X:
 			if (X == 0)
 				return 0;
 			A /= X;
 			continue;
-		case BPF_ALU|BPF_DIV|BPF_K:
+		case BPF_S_ALU_DIV_K:
 			A /= fentry->k;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_X:
+		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
-		case BPF_ALU|BPF_AND|BPF_K:
+		case BPF_S_ALU_AND_K:
 			A &= fentry->k;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_X:
+		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
-		case BPF_ALU|BPF_OR|BPF_K:
+		case BPF_S_ALU_OR_K:
 			A |= fentry->k;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_X:
+		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
-		case BPF_ALU|BPF_LSH|BPF_K:
+		case BPF_S_ALU_LSH_K:
 			A <<= fentry->k;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_X:
+		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
-		case BPF_ALU|BPF_RSH|BPF_K:
+		case BPF_S_ALU_RSH_K:
 			A >>= fentry->k;
 			continue;
-		case BPF_ALU|BPF_NEG:
+		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
-		case BPF_JMP|BPF_JA:
+		case BPF_S_JMP_JA:
 			pc += fentry->k;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_K:
+		case BPF_S_JMP_JGT_K:
 			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_K:
+		case BPF_S_JMP_JGE_K:
 			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_K:
+		case BPF_S_JMP_JEQ_K:
 			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_K:
+		case BPF_S_JMP_JSET_K:
 			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGT|BPF_X:
+		case BPF_S_JMP_JGT_X:
 			pc += (A > X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JGE|BPF_X:
+		case BPF_S_JMP_JGE_X:
 			pc += (A >= X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JEQ|BPF_X:
+		case BPF_S_JMP_JEQ_X:
 			pc += (A == X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_JMP|BPF_JSET|BPF_X:
+		case BPF_S_JMP_JSET_X:
 			pc += (A & X) ? fentry->jt : fentry->jf;
 			continue;
-		case BPF_LD|BPF_W|BPF_ABS:
+		case BPF_S_LD_W_ABS:
 			k = fentry->k;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
@@ -217,7 +217,7 @@ load_w:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_H|BPF_ABS:
+		case BPF_S_LD_H_ABS:
 			k = fentry->k;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
@@ -226,7 +226,7 @@ load_h:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_B|BPF_ABS:
+		case BPF_S_LD_B_ABS:
 			k = fentry->k;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
@@ -235,54 +235,54 @@ load_b:
 				continue;
 			}
 			break;
-		case BPF_LD|BPF_W|BPF_LEN:
+		case BPF_S_LD_W_LEN:
 			A = skb->len;
 			continue;
-		case BPF_LDX|BPF_W|BPF_LEN:
+		case BPF_S_LDX_W_LEN:
 			X = skb->len;
 			continue;
-		case BPF_LD|BPF_W|BPF_IND:
+		case BPF_S_LD_W_IND:
 			k = X + fentry->k;
 			goto load_w;
-		case BPF_LD|BPF_H|BPF_IND:
+		case BPF_S_LD_H_IND:
 			k = X + fentry->k;
 			goto load_h;
-		case BPF_LD|BPF_B|BPF_IND:
+		case BPF_S_LD_B_IND:
 			k = X + fentry->k;
 			goto load_b;
-		case BPF_LDX|BPF_B|BPF_MSH:
+		case BPF_S_LDX_B_MSH:
 			ptr = load_pointer(skb, fentry->k, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
-		case BPF_LD|BPF_IMM:
+		case BPF_S_LD_IMM:
 			A = fentry->k;
 			continue;
-		case BPF_LDX|BPF_IMM:
+		case BPF_S_LDX_IMM:
 			X = fentry->k;
 			continue;
-		case BPF_LD|BPF_MEM:
+		case BPF_S_LD_MEM:
 			A = mem[fentry->k];
 			continue;
-		case BPF_LDX|BPF_MEM:
+		case BPF_S_LDX_MEM:
 			X = mem[fentry->k];
 			continue;
-		case BPF_MISC|BPF_TAX:
+		case BPF_S_MISC_TAX:
 			X = A;
 			continue;
-		case BPF_MISC|BPF_TXA:
+		case BPF_S_MISC_TXA:
 			A = X;
 			continue;
-		case BPF_RET|BPF_K:
+		case BPF_S_RET_K:
 			return fentry->k;
-		case BPF_RET|BPF_A:
+		case BPF_S_RET_A:
 			return A;
-		case BPF_ST:
+		case BPF_S_ST:
 			mem[fentry->k] = A;
 			continue;
-		case BPF_STX:
+		case BPF_S_STX:
 			mem[fentry->k] = X;
 			continue;
 		default:
@@ -390,53 +390,128 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 		/* Only allow valid instructions */
 		switch (ftest->code) {
 		case BPF_ALU|BPF_ADD|BPF_K:
+			ftest->code = BPF_S_ALU_ADD_K;
+			break;
 		case BPF_ALU|BPF_ADD|BPF_X:
+			ftest->code = BPF_S_ALU_ADD_X;
+			break;
 		case BPF_ALU|BPF_SUB|BPF_K:
+			ftest->code = BPF_S_ALU_SUB_K;
+			break;
 		case BPF_ALU|BPF_SUB|BPF_X:
+			ftest->code = BPF_S_ALU_SUB_X;
+			break;
 		case BPF_ALU|BPF_MUL|BPF_K:
+			ftest->code = BPF_S_ALU_MUL_K;
+			break;
 		case BPF_ALU|BPF_MUL|BPF_X:
+			ftest->code = BPF_S_ALU_MUL_X;
+			break;
 		case BPF_ALU|BPF_DIV|BPF_X:
+			ftest->code = BPF_S_ALU_DIV_X;
+			break;
 		case BPF_ALU|BPF_AND|BPF_K:
+			ftest->code = BPF_S_ALU_AND_K;
+			break;
 		case BPF_ALU|BPF_AND|BPF_X:
+			ftest->code = BPF_S_ALU_AND_X;
+			break;
 		case BPF_ALU|BPF_OR|BPF_K:
+			ftest->code = BPF_S_ALU_OR_K;
+			break;
 		case BPF_ALU|BPF_OR|BPF_X:
+			ftest->code = BPF_S_ALU_OR_X;
+			break;
 		case BPF_ALU|BPF_LSH|BPF_K:
+			ftest->code = BPF_S_ALU_LSH_K;
+			break;
 		case BPF_ALU|BPF_LSH|BPF_X:
+			ftest->code = BPF_S_ALU_LSH_X;
+			break;
 		case BPF_ALU|BPF_RSH|BPF_K:
+			ftest->code = BPF_S_ALU_RSH_K;
+			break;
 		case BPF_ALU|BPF_RSH|BPF_X:
+			ftest->code = BPF_S_ALU_RSH_X;
+			break;
 		case BPF_ALU|BPF_NEG:
+			ftest->code = BPF_S_ALU_NEG;
+			break;
 		case BPF_LD|BPF_W|BPF_ABS:
+			ftest->code = BPF_S_LD_W_ABS;
+			break;
 		case BPF_LD|BPF_H|BPF_ABS:
+			ftest->code = BPF_S_LD_H_ABS;
+			break;
 		case BPF_LD|BPF_B|BPF_ABS:
+			ftest->code = BPF_S_LD_B_ABS;
+			break;
 		case BPF_LD|BPF_W|BPF_LEN:
+			ftest->code = BPF_S_LD_W_LEN;
+			break;
 		case BPF_LD|BPF_W|BPF_IND:
+			ftest->code = BPF_S_LD_W_IND;
+			break;
 		case BPF_LD|BPF_H|BPF_IND:
+			ftest->code = BPF_S_LD_H_IND;
+			break;
 		case BPF_LD|BPF_B|BPF_IND:
+			ftest->code = BPF_S_LD_B_IND;
+			break;
 		case BPF_LD|BPF_IMM:
+			ftest->code = BPF_S_LD_IMM;
+			break;
 		case BPF_LDX|BPF_W|BPF_LEN:
+			ftest->code = BPF_S_LDX_W_LEN;
+			break;
 		case BPF_LDX|BPF_B|BPF_MSH:
+			ftest->code = BPF_S_LDX_B_MSH;
+			break;
 		case BPF_LDX|BPF_IMM:
+			ftest->code = BPF_S_LDX_IMM;
+			break;
 		case BPF_MISC|BPF_TAX:
+			ftest->code = BPF_S_MISC_TAX;
+			break;
 		case BPF_MISC|BPF_TXA:
+			ftest->code = BPF_S_MISC_TXA;
+			break;
 		case BPF_RET|BPF_K:
+			ftest->code = BPF_S_RET_K;
+			break;
 		case BPF_RET|BPF_A:
+			ftest->code = BPF_S_RET_A;
 			break;
 
 		/* Some instructions need special checks */
 
-		case BPF_ALU|BPF_DIV|BPF_K:
 			/* check for division by zero */
+		case BPF_ALU|BPF_DIV|BPF_K:
 			if (ftest->k == 0)
 				return -EINVAL;
+			ftest->code = BPF_S_ALU_DIV_K;
 			break;
 
+		/* check for invalid memory addresses */
 		case BPF_LD|BPF_MEM:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_LD_MEM;
+			break;
 		case BPF_LDX|BPF_MEM:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_LDX_MEM;
+			break;
 		case BPF_ST:
+			if (ftest->k >= BPF_MEMWORDS)
+				return -EINVAL;
+			ftest->code = BPF_S_ST;
+			break;
 		case BPF_STX:
-			/* check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
+			ftest->code = BPF_S_STX;
 			break;
 
 		case BPF_JMP|BPF_JA:
@@ -447,28 +522,63 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 			 */
 			if (ftest->k >= (unsigned)(flen-pc-1))
 				return -EINVAL;
+			ftest->code = BPF_S_JMP_JA;
 			break;
 
 		case BPF_JMP|BPF_JEQ|BPF_K:
+			ftest->code = BPF_S_JMP_JEQ_K;
+			break;
 		case BPF_JMP|BPF_JEQ|BPF_X:
+			ftest->code = BPF_S_JMP_JEQ_X;
+			break;
 		case BPF_JMP|BPF_JGE|BPF_K:
+			ftest->code = BPF_S_JMP_JGE_K;
+			break;
 		case BPF_JMP|BPF_JGE|BPF_X:
+			ftest->code = BPF_S_JMP_JGE_X;
+			break;
 		case BPF_JMP|BPF_JGT|BPF_K:
+			ftest->code = BPF_S_JMP_JGT_K;
+			break;
 		case BPF_JMP|BPF_JGT|BPF_X:
+			ftest->code = BPF_S_JMP_JGT_X;
+			break;
 		case BPF_JMP|BPF_JSET|BPF_K:
+			ftest->code = BPF_S_JMP_JSET_K;
+			break;
 		case BPF_JMP|BPF_JSET|BPF_X:
+			ftest->code = BPF_S_JMP_JSET_X;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
 			/* for conditionals both must be safe */
+		switch (ftest->code) {
+		case BPF_S_JMP_JEQ_K:
+		case BPF_S_JMP_JEQ_X:
+		case BPF_S_JMP_JGE_K:
+		case BPF_S_JMP_JGE_X:
+		case BPF_S_JMP_JGT_K:
+		case BPF_S_JMP_JGT_X:
+		case BPF_S_JMP_JSET_X:
+		case BPF_S_JMP_JSET_K:
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
-			break;
+		}
+	}
 
+	/* last instruction must be a RET code */
+	switch (filter[flen - 1].code) {
+	case BPF_S_RET_K:
+	case BPF_S_RET_A:
+		return 0;
+		break;
 		default:
 			return -EINVAL;
 		}
-	}
-
-	return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
 
-- 
cgit v1.2.3-58-ga151


From f9467eaec36a4ee13f5d282d2512f48e118a3120 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 21 Jun 2010 12:29:14 +0000
Subject: net/core/pktgen.c: Use pr_<level>

Add pr_fmt(fmt) KBUILD_MODNAME ": " fmt
Remove "pktgen: " from formats
Convert printks to pr_<level>
Added func_enter() for debugging
Moved version to end of string at module_init
Coalesced long formats

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 142 ++++++++++++++++++++++++------------------------------
 1 file changed, 64 insertions(+), 78 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6428653e9498..1ee2ebd9b04f 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -115,6 +115,9 @@
  * command by Adit Ranadive <adit.262@gmail.com>
  *
  */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/sys.h>
 #include <linux/types.h>
 #include <linux/module.h>
@@ -174,6 +177,8 @@
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
 
+#define func_enter() pr_debug("entering %s\n", __func__);
+
 /* Device flag bits */
 #define F_IPSRC_RND   (1<<0)	/* IP-Src Random  */
 #define F_IPDST_RND   (1<<1)	/* IP-Dst Random  */
@@ -424,7 +429,8 @@ static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2)
 }
 
 static const char version[] =
-	"pktgen " VERSION ": Packet Generator for packet performance testing.\n";
+	"Packet Generator for packet performance testing. "
+	"Version: " VERSION "\n";
 
 static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
 static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
@@ -495,7 +501,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
 		pktgen_reset_all_threads();
 
 	else
-		printk(KERN_WARNING "pktgen: Unknown command: %s\n", data);
+		pr_warning("Unknown command: %s\n", data);
 
 	err = count;
 
@@ -852,14 +858,14 @@ static ssize_t pktgen_if_write(struct file *file,
 	pg_result = &(pkt_dev->result[0]);
 
 	if (count < 1) {
-		printk(KERN_WARNING "pktgen: wrong command format\n");
+		pr_warning("wrong command format\n");
 		return -EINVAL;
 	}
 
 	max = count - i;
 	tmp = count_trail_chars(&user_buffer[i], max);
 	if (tmp < 0) {
-		printk(KERN_WARNING "pktgen: illegal format\n");
+		pr_warning("illegal format\n");
 		return tmp;
 	}
 	i += tmp;
@@ -990,9 +996,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			return len;
 		pkt_dev->delay = pkt_dev->min_pkt_size*8*NSEC_PER_USEC/value;
 		if (debug)
-			printk(KERN_INFO
-				 "pktgen: Delay set at: %llu ns\n",
-					pkt_dev->delay);
+			pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
 
 		sprintf(pg_result, "OK: rate=%lu", value);
 		return count;
@@ -1007,9 +1011,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			return len;
 		pkt_dev->delay = NSEC_PER_SEC/value;
 		if (debug)
-			printk(KERN_INFO
-				 "pktgen: Delay set at: %llu ns\n",
-					pkt_dev->delay);
+			pr_info("Delay set at: %llu ns\n", pkt_dev->delay);
 
 		sprintf(pg_result, "OK: rate=%lu", value);
 		return count;
@@ -1815,7 +1817,7 @@ static ssize_t pktgen_thread_write(struct file *file,
 		       name, (unsigned long)count);
 
 	if (!t) {
-		printk(KERN_ERR "pktgen: ERROR: No thread\n");
+		pr_err("ERROR: No thread\n");
 		ret = -EINVAL;
 		goto out;
 	}
@@ -1908,7 +1910,7 @@ static void pktgen_mark_device(const char *ifname)
 	int i = 0;
 
 	mutex_lock(&pktgen_thread_lock);
-	pr_debug("pktgen: pktgen_mark_device marking %s for removal\n", ifname);
+	pr_debug("%s: marking %s for removal\n", __func__, ifname);
 
 	while (1) {
 
@@ -1917,15 +1919,14 @@ static void pktgen_mark_device(const char *ifname)
 			break;	/* success */
 
 		mutex_unlock(&pktgen_thread_lock);
-		pr_debug("pktgen: pktgen_mark_device waiting for %s "
-				"to disappear....\n", ifname);
+		pr_debug("%s: waiting for %s to disappear....\n",
+			 __func__, ifname);
 		schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try));
 		mutex_lock(&pktgen_thread_lock);
 
 		if (++i >= max_tries) {
-			printk(KERN_ERR "pktgen_mark_device: timed out after "
-			       "waiting %d msec for device %s to be removed\n",
-			       msec_per_try * i, ifname);
+			pr_err("%s: timed out after waiting %d msec for device %s to be removed\n",
+			       __func__, msec_per_try * i, ifname);
 			break;
 		}
 
@@ -1952,8 +1953,8 @@ static void pktgen_change_name(struct net_device *dev)
 							  &pktgen_if_fops,
 							  pkt_dev);
 			if (!pkt_dev->entry)
-				printk(KERN_ERR "pktgen: can't move proc "
-				       " entry for '%s'\n", dev->name);
+				pr_err("can't move proc entry for '%s'\n",
+				       dev->name);
 			break;
 		}
 	}
@@ -2017,15 +2018,15 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname)
 
 	odev = pktgen_dev_get_by_name(pkt_dev, ifname);
 	if (!odev) {
-		printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname);
+		pr_err("no such netdevice: \"%s\"\n", ifname);
 		return -ENODEV;
 	}
 
 	if (odev->type != ARPHRD_ETHER) {
-		printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname);
+		pr_err("not an ethernet device: \"%s\"\n", ifname);
 		err = -EINVAL;
 	} else if (!netif_running(odev)) {
-		printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname);
+		pr_err("device is down: \"%s\"\n", ifname);
 		err = -ENETDOWN;
 	} else {
 		pkt_dev->odev = odev;
@@ -2044,8 +2045,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 	int ntxq;
 
 	if (!pkt_dev->odev) {
-		printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in "
-		       "setup_inject.\n");
+		pr_err("ERROR: pkt_dev->odev == NULL in setup_inject\n");
 		sprintf(pkt_dev->result,
 			"ERROR: pkt_dev->odev == NULL in setup_inject.\n");
 		return;
@@ -2055,19 +2055,15 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 	ntxq = pkt_dev->odev->real_num_tx_queues;
 
 	if (ntxq <= pkt_dev->queue_map_min) {
-		printk(KERN_WARNING "pktgen: WARNING: Requested "
-		       "queue_map_min (zero-based) (%d) exceeds valid range "
-		       "[0 - %d] for (%d) queues on %s, resetting\n",
-		       pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
-		       pkt_dev->odevname);
+		pr_warning("WARNING: Requested queue_map_min (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+			   pkt_dev->queue_map_min, (ntxq ?: 1) - 1, ntxq,
+			   pkt_dev->odevname);
 		pkt_dev->queue_map_min = ntxq - 1;
 	}
 	if (pkt_dev->queue_map_max >= ntxq) {
-		printk(KERN_WARNING "pktgen: WARNING: Requested "
-		       "queue_map_max (zero-based) (%d) exceeds valid range "
-		       "[0 - %d] for (%d) queues on %s, resetting\n",
-		       pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
-		       pkt_dev->odevname);
+		pr_warning("WARNING: Requested queue_map_max (zero-based) (%d) exceeds valid range [0 - %d] for (%d) queues on %s, resetting\n",
+			   pkt_dev->queue_map_max, (ntxq ?: 1) - 1, ntxq,
+			   pkt_dev->odevname);
 		pkt_dev->queue_map_max = ntxq - 1;
 	}
 
@@ -2127,8 +2123,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 			}
 			rcu_read_unlock();
 			if (err)
-				printk(KERN_ERR "pktgen: ERROR: IPv6 link "
-				       "address not availble.\n");
+				pr_err("ERROR: IPv6 link address not available\n");
 		}
 #endif
 	} else {
@@ -2562,8 +2557,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
 			if (nhead > 0) {
 				ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
 				if (ret < 0) {
-					printk(KERN_ERR "Error expanding "
-					       "ipsec packet %d\n", ret);
+					pr_err("Error expanding ipsec packet %d\n",
+					       ret);
 					goto err;
 				}
 			}
@@ -2572,8 +2567,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
 			skb_pull(skb, ETH_HLEN);
 			ret = pktgen_output_ipsec(skb, pkt_dev);
 			if (ret) {
-				printk(KERN_ERR "Error creating ipsec "
-				       "packet %d\n", ret);
+				pr_err("Error creating ipsec packet %d\n", ret);
 				goto err;
 			}
 			/* restore ll */
@@ -3049,8 +3043,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	if (datalen < sizeof(struct pktgen_hdr)) {
 		datalen = sizeof(struct pktgen_hdr);
 		if (net_ratelimit())
-			printk(KERN_INFO "pktgen: increased datalen to %d\n",
-			       datalen);
+			pr_info("increased datalen to %d\n", datalen);
 	}
 
 	udph->source = htons(pkt_dev->cur_udp_src);
@@ -3177,7 +3170,7 @@ static void pktgen_run(struct pktgen_thread *t)
 	struct pktgen_dev *pkt_dev;
 	int started = 0;
 
-	pr_debug("pktgen: entering pktgen_run. %p\n", t);
+	func_enter();
 
 	if_lock(t);
 	list_for_each_entry(pkt_dev, &t->if_list, list) {
@@ -3210,7 +3203,7 @@ static void pktgen_stop_all_threads_ifs(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_stop_all_threads_ifs.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3275,7 +3268,7 @@ static void pktgen_run_all_threads(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_run_all_threads.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3294,7 +3287,7 @@ static void pktgen_reset_all_threads(void)
 {
 	struct pktgen_thread *t;
 
-	pr_debug("pktgen: entering pktgen_reset_all_threads.\n");
+	func_enter();
 
 	mutex_lock(&pktgen_thread_lock);
 
@@ -3344,8 +3337,8 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev)
 	int nr_frags = pkt_dev->skb ? skb_shinfo(pkt_dev->skb)->nr_frags : -1;
 
 	if (!pkt_dev->running) {
-		printk(KERN_WARNING "pktgen: interface: %s is already "
-		       "stopped\n", pkt_dev->odevname);
+		pr_warning("interface: %s is already stopped\n",
+			   pkt_dev->odevname);
 		return -EINVAL;
 	}
 
@@ -3381,7 +3374,7 @@ static void pktgen_stop(struct pktgen_thread *t)
 {
 	struct pktgen_dev *pkt_dev;
 
-	pr_debug("pktgen: entering pktgen_stop\n");
+	func_enter();
 
 	if_lock(t);
 
@@ -3401,7 +3394,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
 	struct list_head *q, *n;
 	struct pktgen_dev *cur;
 
-	pr_debug("pktgen: entering pktgen_rem_one_if\n");
+	func_enter();
 
 	if_lock(t);
 
@@ -3427,9 +3420,10 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
 	struct list_head *q, *n;
 	struct pktgen_dev *cur;
 
+	func_enter();
+
 	/* Remove all devices, free mem */
 
-	pr_debug("pktgen: entering pktgen_rem_all_ifs\n");
 	if_lock(t);
 
 	list_for_each_safe(q, n, &t->if_list) {
@@ -3511,8 +3505,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 		pkt_dev->skb = fill_packet(odev, pkt_dev);
 		if (pkt_dev->skb == NULL) {
-			printk(KERN_ERR "pktgen: ERROR: couldn't "
-			       "allocate skb in fill_packet.\n");
+			pr_err("ERROR: couldn't allocate skb in fill_packet\n");
 			schedule();
 			pkt_dev->clone_count--;	/* back out increment, OOM */
 			return;
@@ -3592,8 +3585,7 @@ static int pktgen_thread_worker(void *arg)
 	init_waitqueue_head(&t->queue);
 	complete(&t->start_done);
 
-	pr_debug("pktgen: starting pktgen/%d:  pid=%d\n",
-		 cpu, task_pid_nr(current));
+	pr_debug("starting pktgen/%d:  pid=%d\n", cpu, task_pid_nr(current));
 
 	set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3646,13 +3638,13 @@ static int pktgen_thread_worker(void *arg)
 		set_current_state(TASK_INTERRUPTIBLE);
 	}
 
-	pr_debug("pktgen: %s stopping all device\n", t->tsk->comm);
+	pr_debug("%s stopping all device\n", t->tsk->comm);
 	pktgen_stop(t);
 
-	pr_debug("pktgen: %s removing all device\n", t->tsk->comm);
+	pr_debug("%s removing all device\n", t->tsk->comm);
 	pktgen_rem_all_ifs(t);
 
-	pr_debug("pktgen: %s removing thread.\n", t->tsk->comm);
+	pr_debug("%s removing thread\n", t->tsk->comm);
 	pktgen_rem_thread(t);
 
 	return 0;
@@ -3676,7 +3668,7 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t,
 		}
 
 	if_unlock(t);
-	pr_debug("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev);
+	pr_debug("find_dev(%s) returning %p\n", ifname, pkt_dev);
 	return pkt_dev;
 }
 
@@ -3692,8 +3684,7 @@ static int add_dev_to_thread(struct pktgen_thread *t,
 	if_lock(t);
 
 	if (pkt_dev->pg_thread) {
-		printk(KERN_ERR "pktgen: ERROR: already assigned "
-		       "to a thread.\n");
+		pr_err("ERROR: already assigned to a thread\n");
 		rv = -EBUSY;
 		goto out;
 	}
@@ -3719,7 +3710,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 
 	pkt_dev = __pktgen_NN_threads(ifname, FIND);
 	if (pkt_dev) {
-		printk(KERN_ERR "pktgen: ERROR: interface already used.\n");
+		pr_err("ERROR: interface already used\n");
 		return -EBUSY;
 	}
 
@@ -3764,7 +3755,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir,
 					  &pktgen_if_fops, pkt_dev);
 	if (!pkt_dev->entry) {
-		printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
+		pr_err("cannot create %s/%s procfs entry\n",
 		       PG_PROC_DIR, ifname);
 		err = -EINVAL;
 		goto out2;
@@ -3795,8 +3786,7 @@ static int __init pktgen_create_thread(int cpu)
 	t = kzalloc_node(sizeof(struct pktgen_thread), GFP_KERNEL,
 			 cpu_to_node(cpu));
 	if (!t) {
-		printk(KERN_ERR "pktgen: ERROR: out of memory, can't "
-		       "create new thread.\n");
+		pr_err("ERROR: out of memory, can't create new thread\n");
 		return -ENOMEM;
 	}
 
@@ -3810,8 +3800,7 @@ static int __init pktgen_create_thread(int cpu)
 
 	p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
 	if (IS_ERR(p)) {
-		printk(KERN_ERR "pktgen: kernel_thread() failed "
-		       "for cpu %d\n", t->cpu);
+		pr_err("kernel_thread() failed for cpu %d\n", t->cpu);
 		list_del(&t->th_list);
 		kfree(t);
 		return PTR_ERR(p);
@@ -3822,7 +3811,7 @@ static int __init pktgen_create_thread(int cpu)
 	pe = proc_create_data(t->tsk->comm, 0600, pg_proc_dir,
 			      &pktgen_thread_fops, t);
 	if (!pe) {
-		printk(KERN_ERR "pktgen: cannot create %s/%s procfs entry.\n",
+		pr_err("cannot create %s/%s procfs entry\n",
 		       PG_PROC_DIR, t->tsk->comm);
 		kthread_stop(p);
 		list_del(&t->th_list);
@@ -3856,11 +3845,10 @@ static int pktgen_remove_device(struct pktgen_thread *t,
 				struct pktgen_dev *pkt_dev)
 {
 
-	pr_debug("pktgen: remove_device pkt_dev=%p\n", pkt_dev);
+	pr_debug("remove_device pkt_dev=%p\n", pkt_dev);
 
 	if (pkt_dev->running) {
-		printk(KERN_WARNING "pktgen: WARNING: trying to remove a "
-		       "running interface, stopping it now.\n");
+		pr_warning("WARNING: trying to remove a running interface, stopping it now\n");
 		pktgen_stop_device(pkt_dev);
 	}
 
@@ -3891,7 +3879,7 @@ static int __init pg_init(void)
 	int cpu;
 	struct proc_dir_entry *pe;
 
-	printk(KERN_INFO "%s", version);
+	pr_info("%s", version);
 
 	pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
 	if (!pg_proc_dir)
@@ -3899,8 +3887,7 @@ static int __init pg_init(void)
 
 	pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops);
 	if (pe == NULL) {
-		printk(KERN_ERR "pktgen: ERROR: cannot create %s "
-		       "procfs entry.\n", PGCTRL);
+		pr_err("ERROR: cannot create %s procfs entry\n", PGCTRL);
 		proc_net_remove(&init_net, PG_PROC_DIR);
 		return -EINVAL;
 	}
@@ -3913,13 +3900,12 @@ static int __init pg_init(void)
 
 		err = pktgen_create_thread(cpu);
 		if (err)
-			printk(KERN_WARNING "pktgen: WARNING: Cannot create "
-			       "thread for cpu %d (%d)\n", cpu, err);
+			pr_warning("WARNING: Cannot create thread for cpu %d (%d)\n",
+				   cpu, err);
 	}
 
 	if (list_empty(&pktgen_threads)) {
-		printk(KERN_ERR "pktgen: ERROR: Initialization failed for "
-		       "all threads\n");
+		pr_err("ERROR: Initialization failed for all threads\n");
 		unregister_netdevice_notifier(&pktgen_notifier_block);
 		remove_proc_entry(PGCTRL, pg_proc_dir);
 		proc_net_remove(&init_net, PG_PROC_DIR);
-- 
cgit v1.2.3-58-ga151


From a7d13fbf85375698879d16f118af77fbfcc2de44 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 22 Jun 2010 01:14:34 +0000
Subject: dccp: remove unused function argument

This removes an unused 'sk' argument from several option-inserting functions.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c      |  2 +-
 net/dccp/ccids/ccid3.c |  4 ++--
 net/dccp/dccp.h        | 12 ++++--------
 net/dccp/options.c     | 14 ++++++--------
 4 files changed, 13 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 01e4d39fa232..2abddee48304 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -82,7 +82,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
 	elapsed_time = delta / 10;
 
 	if (elapsed_time != 0 &&
-	    dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
+	    dccp_insert_option_elapsed_time(skb, elapsed_time))
 		return -1;
 
 	avr = dccp_ackvec_record_new();
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index d3235899c7e3..95f752986497 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -715,9 +715,9 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 	x_recv = htonl(hc->rx_x_recv);
 	pinv   = htonl(hc->rx_pinv);
 
-	if (dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE,
+	if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE,
 			       &pinv, sizeof(pinv)) ||
-	    dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE,
+	    dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE,
 			       &x_recv, sizeof(x_recv)))
 		return -1;
 
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index a10a61a1ded2..3ccef1b70fee 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -446,16 +446,12 @@ extern void dccp_feat_list_purge(struct list_head *fn_list);
 
 extern int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
 extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
-extern int dccp_insert_option_elapsed_time(struct sock *sk,
-					    struct sk_buff *skb,
-					    u32 elapsed_time);
+extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
 extern u32 dccp_timestamp(void);
 extern void dccp_timestamping_init(void);
-extern int dccp_insert_option_timestamp(struct sock *sk,
-					 struct sk_buff *skb);
-extern int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
-			       unsigned char option,
-			       const void *value, unsigned char len);
+extern int dccp_insert_option_timestamp(struct sk_buff *skb);
+extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
+			      const void *value, unsigned char len);
 
 #ifdef CONFIG_SYSCTL
 extern int dccp_sysctl_init(void);
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 07395f861d35..4973badb5d55 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -299,9 +299,8 @@ static inline u8 dccp_ndp_len(const u64 ndp)
 	return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
 }
 
-int dccp_insert_option(struct sock *sk, struct sk_buff *skb,
-			const unsigned char option,
-			const void *value, const unsigned char len)
+int dccp_insert_option(struct sk_buff *skb, const unsigned char option,
+		       const void *value, const unsigned char len)
 {
 	unsigned char *to;
 
@@ -354,8 +353,7 @@ static inline int dccp_elapsed_time_len(const u32 elapsed_time)
 	return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
 }
 
-int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
-				    u32 elapsed_time)
+int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
 {
 	const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
 	const int len = 2 + elapsed_time_len;
@@ -386,13 +384,13 @@ int dccp_insert_option_elapsed_time(struct sock *sk, struct sk_buff *skb,
 
 EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
 
-int dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
+int dccp_insert_option_timestamp(struct sk_buff *skb)
 {
 	__be32 now = htonl(dccp_timestamp());
 	/* yes this will overflow but that is the point as we want a
 	 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
 
-	return dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
+	return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now));
 }
 
 EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
@@ -533,7 +531,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 			 * Obtain RTT sample from Request/Response exchange.
 			 * This is currently used in CCID 3 initialisation.
 			 */
-			if (dccp_insert_option_timestamp(sk, skb))
+			if (dccp_insert_option_timestamp(skb))
 				return -1;
 
 		} else if (dp->dccps_hc_rx_ackvec != NULL &&
-- 
cgit v1.2.3-58-ga151


From 59b80802a8a18b64d38b51aa168253684b2649d5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Tue, 22 Jun 2010 01:14:35 +0000
Subject: dccp: make implementation of Syn-RTT symmetric

This patch is thanks to Andre Noll who reported the issue and helped testing.

The Syn-RTT sampled during the initial handshake currently only works for
the client sending the DCCP-Request. TFRC penalizes the absence of an RTT
sample with a very slow initial speed (1 packet per second), which delays
slow-start significantly, resulting in sluggish performance.

This patch mirrors the "Syn RTT" principle by adding a timestamp also onto
the DCCP-Response, producing an RTT sample  when the (Data)Ack completing
the handshake arrives.

Also changed the documentation to 'TFRC' since Syn RTTs are also used by CCID-4.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/input.c   | 13 +++++++++++--
 net/dccp/options.c |  6 +++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/dccp/input.c b/net/dccp/input.c
index 6beb6a7d6fba..10c957a88f4f 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -430,7 +430,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		if (dccp_parse_options(sk, NULL, skb))
 			return 1;
 
-		/* Obtain usec RTT sample from SYN exchange (used by CCID 3) */
+		/* Obtain usec RTT sample from SYN exchange (used by TFRC). */
 		if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
 			dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
 			    dp->dccps_options_received.dccpor_timestamp_echo));
@@ -535,6 +535,8 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
 						   const struct dccp_hdr *dh,
 						   const unsigned len)
 {
+	struct dccp_sock *dp = dccp_sk(sk);
+	u32 sample = dp->dccps_options_received.dccpor_timestamp_echo;
 	int queued = 0;
 
 	switch (dh->dccph_type) {
@@ -559,7 +561,14 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
 		if (sk->sk_state == DCCP_PARTOPEN)
 			inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 
-		dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
+		/* Obtain usec RTT sample from SYN exchange (used by TFRC). */
+		if (likely(sample)) {
+			long delta = dccp_timestamp() - sample;
+
+			dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta);
+		}
+
+		dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
 		dccp_set_state(sk, DCCP_OPEN);
 
 		if (dh->dccph_type == DCCP_PKT_DATAACK ||
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 4973badb5d55..bfda087bd90d 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -529,7 +529,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
 		if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
 			/*
 			 * Obtain RTT sample from Request/Response exchange.
-			 * This is currently used in CCID 3 initialisation.
+			 * This is currently used for TFRC initialisation.
 			 */
 			if (dccp_insert_option_timestamp(skb))
 				return -1;
@@ -562,6 +562,10 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
 	if (dccp_feat_insert_opts(NULL, dreq, skb))
 		return -1;
 
+	/* Obtain RTT sample from Response/Ack exchange (used by TFRC). */
+	if (dccp_insert_option_timestamp(skb))
+		return -1;
+
 	if (dreq->dreq_timestamp_echo != 0 &&
 	    dccp_insert_option_timestamp_echo(NULL, dreq, skb))
 		return -1;
-- 
cgit v1.2.3-58-ga151


From 4b4194c40f4ac8d03a700845f8978cba53246b5a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Jun 2010 07:43:15 +0000
Subject: arp: RCU change in arp_solicit()

Avoid two atomic ops in arp_solicit()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index cf78f41830ca..09ead1baa99e 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -333,11 +333,14 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	struct net_device *dev = neigh->dev;
 	__be32 target = *(__be32*)neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
-	struct in_device *in_dev = in_dev_get(dev);
+	struct in_device *in_dev;
 
-	if (!in_dev)
+	rcu_read_lock();
+	in_dev = __in_dev_get_rcu(dev);
+	if (!in_dev) {
+		rcu_read_unlock();
 		return;
-
+	}
 	switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
 	default:
 	case 0:		/* By default announce any local IP */
@@ -358,9 +361,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	case 2:		/* Avoid secondary IPs, get a primary/preferred one */
 		break;
 	}
+	rcu_read_unlock();
 
-	if (in_dev)
-		in_dev_put(in_dev);
 	if (!saddr)
 		saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
 
-- 
cgit v1.2.3-58-ga151


From 1823e4c80eeae2a774c75569ce3035070e5ee009 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Jun 2010 20:58:41 +0000
Subject: snmp: add align parameter to snmp_mib_init()

In preparation for 64bit snmp counters for some mibs,
add an 'align' parameter to snmp_mib_init(), instead
of assuming mibs only contain 'unsigned long' fields.

Callers can use __alignof__(type) to provide correct
alignment.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
CC: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  2 +-
 net/dccp/proto.c       |  3 ++-
 net/ipv4/af_inet.c     | 27 +++++++++++++++++----------
 net/ipv6/addrconf.c    |  9 ++++++---
 net/ipv6/af_inet6.c    | 15 ++++++++++-----
 net/sctp/protocol.c    |  3 ++-
 net/xfrm/xfrm_policy.c |  3 ++-
 7 files changed, 40 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index d52f01180361..3b524df7dddb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -178,7 +178,7 @@ extern struct ipv4_config ipv4_config;
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 extern unsigned long snmp_fold_field(void __percpu *mib[], int offt);
-extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize);
+extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align);
 extern void snmp_mib_free(void __percpu *ptr[2]);
 
 extern struct local_ports {
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f79bcef5088f..096250d1323b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1002,7 +1002,8 @@ EXPORT_SYMBOL_GPL(dccp_shutdown);
 static inline int dccp_mib_init(void)
 {
 	return snmp_mib_init((void __percpu **)dccp_statistics,
-			     sizeof(struct dccp_mib));
+			     sizeof(struct dccp_mib),
+			     __alignof__(struct dccp_mib));
 }
 
 static inline void dccp_mib_exit(void)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b4c0969137cb..640db9b90330 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1427,13 +1427,13 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
-int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
+int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 {
 	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
+	ptr[0] = __alloc_percpu(mibsize, align);
 	if (!ptr[0])
 		goto err0;
-	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
+	ptr[1] = __alloc_percpu(mibsize, align);
 	if (!ptr[1])
 		goto err1;
 	return 0;
@@ -1490,25 +1490,32 @@ static const struct net_protocol icmp_protocol = {
 static __net_init int ipv4_mib_init_net(struct net *net)
 {
 	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
-			  sizeof(struct tcp_mib)) < 0)
+			  sizeof(struct tcp_mib),
+			  __alignof__(struct tcp_mib)) < 0)
 		goto err_tcp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
-			  sizeof(struct linux_mib)) < 0)
+			  sizeof(struct linux_mib),
+			  __alignof__(struct linux_mib)) < 0)
 		goto err_net_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
-			  sizeof(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
-			  sizeof(struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
-			  sizeof(struct icmp_mib)) < 0)
+			  sizeof(struct icmp_mib),
+			  __alignof__(struct icmp_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
-			  sizeof(struct icmpmsg_mib)) < 0)
+			  sizeof(struct icmpmsg_mib),
+			  __alignof__(struct icmpmsg_mib)) < 0)
 		goto err_icmpmsg_mib;
 
 	tcp_mib_init(net);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b97bb1f30808..c20a7c260a8f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -284,13 +284,16 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
 	if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip;
 	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
-			  sizeof(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp;
 	if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
-			  sizeof(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib),
+			  __alignof__(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg;
 
 	return 0;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 94b1b9c954bf..e830cd4f9d0f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -971,19 +971,24 @@ static void ipv6_packet_cleanup(void)
 static int __net_init ipv6_init_mibs(struct net *net)
 {
 	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		return -ENOMEM;
 	if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6,
-			  sizeof (struct udp_mib)) < 0)
+			  sizeof(struct udp_mib),
+			  __alignof__(struct udp_mib)) < 0)
 		goto err_udplite_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics,
-			  sizeof(struct ipstats_mib)) < 0)
+			  sizeof(struct ipstats_mib),
+			  __alignof__(struct ipstats_mib)) < 0)
 		goto err_ip_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics,
-			  sizeof(struct icmpv6_mib)) < 0)
+			  sizeof(struct icmpv6_mib),
+			  __alignof__(struct icmpv6_mib)) < 0)
 		goto err_icmp_mib;
 	if (snmp_mib_init((void __percpu **)net->mib.icmpv6msg_statistics,
-			  sizeof(struct icmpv6msg_mib)) < 0)
+			  sizeof(struct icmpv6msg_mib),
+			  __alignof__(struct icmpv6msg_mib)) < 0)
 		goto err_icmpmsg_mib;
 	return 0;
 
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a0e1a7fdebbf..c0e162aeb0bd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1002,7 +1002,8 @@ int sctp_register_pf(struct sctp_pf *pf, sa_family_t family)
 static inline int init_sctp_mibs(void)
 {
 	return snmp_mib_init((void __percpu **)sctp_statistics,
-			     sizeof(struct sctp_mib));
+			     sizeof(struct sctp_mib),
+			     __alignof__(struct sctp_mib));
 }
 
 static inline void cleanup_sctp_mibs(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4bf27d901333..593c06be6b62 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2480,7 +2480,8 @@ static int __net_init xfrm_statistics_init(struct net *net)
 	int rv;
 
 	if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
-			  sizeof(struct linux_xfrm_mib)) < 0)
+			  sizeof(struct linux_xfrm_mib),
+			  __alignof__(struct linux_xfrm_mib)) < 0)
 		return -ENOMEM;
 	rv = xfrm_proc_init(net);
 	if (rv < 0)
-- 
cgit v1.2.3-58-ga151


From 9587c6ddd452314e8ed5707ad832a507a030ef57 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Jun 2010 02:51:25 +0000
Subject: ipv6: remove ipv6_statistics

commit 9261e5370112 (ipv6: making ip and icmp statistics per/namespace)
forgot to remove ipv6_statistics variable.

commit bc417d99bf27 (ipv6: remove stale MIB definitions) took care of
icmpv6_statistics & icmpv6msg_statistics

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Denis V. Lunev <den@openvz.org>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index bd43f0152c21..a7f66bc8f0b0 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -55,8 +55,6 @@
 
 #include <asm/uaccess.h>
 
-DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics) __read_mostly;
-
 struct ip6_ra_chain *ip6_ra_chain;
 DEFINE_RWLOCK(ip6_ra_lock);
 
-- 
cgit v1.2.3-58-ga151


From 734f614bc1e7c6bf075d201f6bd9a555b8b4a984 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 21 Jun 2010 11:48:44 +0000
Subject: syncookies: do not store rcv_wscale in tcp timestamp

As pointed out by Fernando Gont there is no need to encode rcv_wscale
into the cookie.

We did not use the restored rcv_wscale anyway; it is recomputed
via tcp_select_initial_window().

Thus we can save 4 bits in the ts option space by removing rcv_wscale.
In case window scaling was not supported, we set the (invalid) wscale
value 0xf.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 35 ++++++++++++++---------------------
 net/ipv6/syncookies.c |  1 -
 2 files changed, 14 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 51b5662545d6..8896329aebd0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -18,8 +18,8 @@
 #include <net/tcp.h>
 #include <net/route.h>
 
-/* Timestamps: lowest 9 bits store TCP options */
-#define TSBITS 9
+/* Timestamps: lowest bits store TCP options */
+#define TSBITS 5
 #define TSMASK (((__u32)1 << TSBITS) - 1)
 
 extern int sysctl_tcp_syncookies;
@@ -58,7 +58,7 @@ static u32 cookie_hash(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport,
 
 /*
  * when syncookies are in effect and tcp timestamps are enabled we encode
- * tcp options in the lowest 9 bits of the timestamp value that will be
+ * tcp options in the lower bits of the timestamp value that will be
  * sent in the syn-ack.
  * Since subsequent timestamps use the normal tcp_time_stamp value, we
  * must make sure that the resulting initial timestamp is <= tcp_time_stamp.
@@ -70,11 +70,9 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 	u32 options = 0;
 
 	ireq = inet_rsk(req);
-	if (ireq->wscale_ok) {
-		options = ireq->snd_wscale;
-		options |= ireq->rcv_wscale << 4;
-	}
-	options |= ireq->sack_ok << 8;
+
+	options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
+	options |= ireq->sack_ok << 4;
 
 	ts = ts_now & ~TSMASK;
 	ts |= options;
@@ -227,15 +225,14 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * additional tcp options in the timestamp.
  * This extracts these options from the timestamp echo.
  *
- * The lowest 4 bits are for snd_wscale
- * The next 4 lsb are for rcv_wscale
+ * The lowest 4 bits store snd_wscale.
  * The next lsb is for sack_ok
  *
  * return false if we decode an option that should not be.
  */
 bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 {
-	/* echoed timestamp, 9 lowest bits contain options */
+	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
 
 	if (!tcp_opt->saw_tstamp)  {
@@ -246,20 +243,17 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 	if (!sysctl_tcp_timestamps)
 		return false;
 
-	tcp_opt->snd_wscale = options & 0xf;
-	options >>= 4;
-	tcp_opt->rcv_wscale = options & 0xf;
-
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
 		return false;
 
-	if (tcp_opt->snd_wscale || tcp_opt->rcv_wscale) {
-		tcp_opt->wscale_ok = 1;
-		return sysctl_tcp_window_scaling != 0;
-	}
-	return true;
+	if ((options & 0xf) == 0xf)
+		return true; /* no window scaling */
+
+	tcp_opt->wscale_ok = 1;
+	tcp_opt->snd_wscale = options & 0xf;
+	return sysctl_tcp_window_scaling != 0;
 }
 EXPORT_SYMBOL(cookie_check_timestamp);
 
@@ -313,7 +307,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
 	ireq->ecn_ok		= 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
-	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index c7ee57421ece..84d818cfae17 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -217,7 +217,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->retrans = 0;
 	ireq->ecn_ok		= 0;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
-	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
 	ireq->tstamp_ok		= tcp_opt.saw_tstamp;
-- 
cgit v1.2.3-58-ga151


From 172d69e63c7f1e8300d0e1c1bbd8eb0f630faa15 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 21 Jun 2010 11:48:45 +0000
Subject: syncookies: add support for ECN

Allows use of ECN when syncookies are in effect by encoding ecn_ok
into the syn-ack tcp timestamp.

While at it, remove a uneeded #ifdef CONFIG_SYN_COOKIES.
With CONFIG_SYN_COOKIES=nm want_cookie is ifdef'd to 0 and gcc
removes the "if (0)".

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  2 +-
 net/ipv4/syncookies.c | 15 ++++++++++-----
 net/ipv4/tcp_ipv4.c   |  6 ++----
 net/ipv6/syncookies.c |  5 +++--
 net/ipv6/tcp_ipv6.c   |  2 +-
 5 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 18c246c9b009..c2f96c2cc89c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -464,7 +464,7 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 				     __u16 *mss);
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern bool cookie_check_timestamp(struct tcp_options_received *tcp_opt);
+extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
 
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 8896329aebd0..650cace2180d 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,7 +19,7 @@
 #include <net/route.h>
 
 /* Timestamps: lowest bits store TCP options */
-#define TSBITS 5
+#define TSBITS 6
 #define TSMASK (((__u32)1 << TSBITS) - 1)
 
 extern int sysctl_tcp_syncookies;
@@ -73,6 +73,7 @@ __u32 cookie_init_timestamp(struct request_sock *req)
 
 	options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
 	options |= ireq->sack_ok << 4;
+	options |= ireq->ecn_ok << 5;
 
 	ts = ts_now & ~TSMASK;
 	ts |= options;
@@ -226,11 +227,11 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  * This extracts these options from the timestamp echo.
  *
  * The lowest 4 bits store snd_wscale.
- * The next lsb is for sack_ok
+ * next 2 bits indicate SACK and ECN support.
  *
  * return false if we decode an option that should not be.
  */
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 {
 	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
@@ -244,6 +245,9 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt)
 		return false;
 
 	tcp_opt->sack_ok = (options >> 4) & 0x1;
+	*ecn_ok = (options >> 5) & 1;
+	if (*ecn_ok && !sysctl_tcp_ecn)
+		return false;
 
 	if (tcp_opt->sack_ok && !sysctl_tcp_sack)
 		return false;
@@ -272,6 +276,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	int mss;
 	struct rtable *rt;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -288,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -305,7 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e41e6f92968..8fa32f5ae2ce 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1328,14 +1328,12 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (security_inet_conn_request(sk, skb, req))
 		goto drop_and_free;
 
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (want_cookie) {
-#ifdef CONFIG_SYN_COOKIES
-		req->cookie_ts = tmp_opt.tstamp_ok;
-#endif
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+		req->cookie_ts = tmp_opt.tstamp_ok;
 	} else if (!isn) {
 		struct inet_peer *peer = NULL;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 84d818cfae17..09fd34f0dbf2 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -164,6 +164,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	int mss;
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
+	bool ecn_ok;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -180,7 +181,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
 
-	if (!cookie_check_timestamp(&tcp_opt))
+	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
 		goto out;
 
 	ret = NULL;
@@ -215,7 +216,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	req->expires = 0UL;
 	req->retrans = 0;
-	ireq->ecn_ok		= 0;
+	ireq->ecn_ok		= ecn_ok;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
 	ireq->wscale_ok		= tcp_opt.wscale_ok;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f87534569366..5ebc27ecebdc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1269,7 +1269,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	if (!want_cookie)
+	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
 	if (!isn) {
-- 
cgit v1.2.3-58-ga151


From cf377eb4aeded926375d4d0fe0b66ba95f0521e1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 28 Jun 2010 14:12:41 +0200
Subject: netfilter: ipt_LOG/ip6t_LOG: remove comparison within loop

Remove the comparison within the loop to print the macheader by prepending
the colon to all but the first printk.

Based on suggestion by Jan Engelhardt <jengelh@medozas.de>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_LOG.c  | 12 ++++++------
 net/ipv6/netfilter/ip6t_LOG.c |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 5234f4f3499a..0a452a54adbe 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -411,12 +411,12 @@ ipt_log_packet(u_int8_t pf,
 		    skb->mac_header != skb->network_header) {
 			int i;
 			const unsigned char *p = skb_mac_header(skb);
-			for (i = 0; i < skb->dev->hard_header_len; i++,p++)
-				printk("%02x%c", *p,
-				       i==skb->dev->hard_header_len - 1
-				       ? ' ':':');
-		} else
-			printk(" ");
+
+			printk("%02x", *p++);
+			for (i = 1; i < skb->dev->hard_header_len; i++, p++)
+				printk(":%02x", *p);
+		}
+		printk(" ");
 	}
 
 	dump_packet(loginfo, skb, 0);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index af4ee11f2066..4c7ddac7c62b 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -414,9 +414,9 @@ ip6t_log_packet(u_int8_t pf,
 				p = NULL;
 
 			if (p != NULL) {
-				for (i = 0; i < len; i++)
-					printk("%02x%s", p[i],
-					       i == len - 1 ? "" : ":");
+				printk("%02x", *p++);
+				for (i = 1; i < len; i++)
+					printk(":%02x", p[i]);
 			}
 			printk(" ");
 
-- 
cgit v1.2.3-58-ga151


From 7eb9282cd0efac08b8377cbd5037ba297c77e3f7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 28 Jun 2010 14:16:08 +0200
Subject: netfilter: ipt_LOG/ip6t_LOG: add option to print decoded MAC header

The LOG targets print the entire MAC header as one long string, which is not
readable very well:

IN=eth0 OUT= MAC=00:15:f2:24:91:f8:00:1b:24:dc:61:e6:08:00 ...

Add an option to decode known header formats (currently just ARPHRD_ETHER devices)
in their individual fields:

IN=eth0 OUT= MACSRC=00:1b:24:dc:61:e6 MACDST=00:15:f2:24:91:f8 MACPROTO=0800 ...
IN=eth0 OUT= MACSRC=00:1b:24:dc:61:e6 MACDST=00:15:f2:24:91:f8 MACPROTO=86dd ...

The option needs to be explicitly enabled by userspace to avoid breaking
existing parsers.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_ipv4/ipt_LOG.h  |  3 +-
 include/linux/netfilter_ipv6/ip6t_LOG.h |  3 +-
 net/ipv4/netfilter/ipt_LOG.c            | 54 ++++++++++++++++------
 net/ipv6/netfilter/ip6t_LOG.c           | 81 +++++++++++++++++++++------------
 4 files changed, 97 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h
index 90fa6525ef9c..dcdbadf9fd4a 100644
--- a/include/linux/netfilter_ipv4/ipt_LOG.h
+++ b/include/linux/netfilter_ipv4/ipt_LOG.h
@@ -7,7 +7,8 @@
 #define IPT_LOG_IPOPT		0x04	/* Log IP options */
 #define IPT_LOG_UID		0x08	/* Log UID owning local socket */
 #define IPT_LOG_NFLOG		0x10	/* Unsupported, don't reuse */
-#define IPT_LOG_MASK		0x1f
+#define IPT_LOG_MACDECODE	0x20	/* Decode MAC header */
+#define IPT_LOG_MASK		0x2f
 
 struct ipt_log_info {
 	unsigned char level;
diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h
index 0d0119b0458c..9dd5579e02ec 100644
--- a/include/linux/netfilter_ipv6/ip6t_LOG.h
+++ b/include/linux/netfilter_ipv6/ip6t_LOG.h
@@ -7,7 +7,8 @@
 #define IP6T_LOG_IPOPT		0x04	/* Log IP options */
 #define IP6T_LOG_UID		0x08	/* Log UID owning local socket */
 #define IP6T_LOG_NFLOG		0x10	/* Unsupported, don't use */
-#define IP6T_LOG_MASK		0x1f
+#define IP6T_LOG_MACDECODE	0x20	/* Decode MAC header */
+#define IP6T_LOG_MASK		0x2f
 
 struct ip6t_log_info {
 	unsigned char level;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 0a452a54adbe..915fc17d7ce2 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <net/icmp.h>
 #include <net/udp.h>
@@ -363,6 +364,42 @@ static void dump_packet(const struct nf_loginfo *info,
 	/* maxlen = 230+   91  + 230 + 252 = 803 */
 }
 
+static void dump_mac_header(const struct nf_loginfo *info,
+			    const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & IPT_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+		       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+		       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	printk("MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int i;
+
+		printk("%02x", *p++);
+		for (i = 1; i < dev->hard_header_len; i++, p++)
+			printk(":%02x", *p);
+	}
+	printk(" ");
+}
+
 static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
@@ -404,20 +441,9 @@ ipt_log_packet(u_int8_t pf,
 	}
 #endif
 
-	if (in && !out) {
-		/* MAC logging for input chain only. */
-		printk("MAC=");
-		if (skb->dev && skb->dev->hard_header_len &&
-		    skb->mac_header != skb->network_header) {
-			int i;
-			const unsigned char *p = skb_mac_header(skb);
-
-			printk("%02x", *p++);
-			for (i = 1; i < skb->dev->hard_header_len; i++, p++)
-				printk(":%02x", *p);
-		}
-		printk(" ");
-	}
+	/* MAC logging for input path only. */
+	if (in && !out)
+		dump_mac_header(loginfo, skb);
 
 	dump_packet(loginfo, skb, 0);
 	printk("\n");
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 4c7ddac7c62b..0a07ae7b933f 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -373,6 +373,56 @@ static void dump_packet(const struct nf_loginfo *info,
 		printk("MARK=0x%x ", skb->mark);
 }
 
+static void dump_mac_header(const struct nf_loginfo *info,
+			    const struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+	unsigned int logflags = 0;
+
+	if (info->type == NF_LOG_TYPE_LOG)
+		logflags = info->u.log.logflags;
+
+	if (!(logflags & IP6T_LOG_MACDECODE))
+		goto fallback;
+
+	switch (dev->type) {
+	case ARPHRD_ETHER:
+		printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
+		       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+		       ntohs(eth_hdr(skb)->h_proto));
+		return;
+	default:
+		break;
+	}
+
+fallback:
+	printk("MAC=");
+	if (dev->hard_header_len &&
+	    skb->mac_header != skb->network_header) {
+		const unsigned char *p = skb_mac_header(skb);
+		unsigned int len = dev->hard_header_len;
+		unsigned int i;
+
+		if (dev->type == ARPHRD_SIT &&
+		    (p -= ETH_HLEN) < skb->head)
+			p = NULL;
+
+		if (p != NULL) {
+			printk("%02x", *p++);
+			for (i = 1; i < len; i++)
+				printk(":%02x", p[i]);
+		}
+		printk(" ");
+
+		if (dev->type == ARPHRD_SIT) {
+			const struct iphdr *iph =
+				(struct iphdr *)skb_mac_header(skb);
+			printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
+		}
+	} else
+		printk(" ");
+}
+
 static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
@@ -400,35 +450,10 @@ ip6t_log_packet(u_int8_t pf,
 		prefix,
 		in ? in->name : "",
 		out ? out->name : "");
-	if (in && !out) {
-		unsigned int len;
-		/* MAC logging for input chain only. */
-		printk("MAC=");
-		if (skb->dev && (len = skb->dev->hard_header_len) &&
-		    skb->mac_header != skb->network_header) {
-			const unsigned char *p = skb_mac_header(skb);
-			int i;
-
-			if (skb->dev->type == ARPHRD_SIT &&
-			    (p -= ETH_HLEN) < skb->head)
-				p = NULL;
-
-			if (p != NULL) {
-				printk("%02x", *p++);
-				for (i = 1; i < len; i++)
-					printk(":%02x", p[i]);
-			}
-			printk(" ");
 
-			if (skb->dev->type == ARPHRD_SIT) {
-				const struct iphdr *iph =
-					(struct iphdr *)skb_mac_header(skb);
-				printk("TUNNEL=%pI4->%pI4 ",
-				       &iph->saddr, &iph->daddr);
-			}
-		} else
-			printk(" ");
-	}
+	/* MAC logging for input path only. */
+	if (in && !out)
+		dump_mac_header(loginfo, skb);
 
 	dump_packet(loginfo, skb, skb_network_offset(skb), 1);
 	printk("\n");
-- 
cgit v1.2.3-58-ga151


From 7a9b2d59507d85569b8a456688ef40cf2ac73e48 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 00:52:37 +0000
Subject: net: use this_cpu_ptr()

use this_cpu_ptr(p) instead of per_cpu_ptr(p, smp_processor_id())

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c     | 4 ++--
 net/ipv4/ip_input.c | 2 +-
 net/ipv4/tcp.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/flow.c b/net/core/flow.c
index 161900674009..8c7c91a32f18 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -222,7 +222,7 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 	unsigned int hash;
 
 	local_bh_disable();
-	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+	fcp = this_cpu_ptr(fc->percpu);
 
 	fle = NULL;
 	flo = NULL;
@@ -302,7 +302,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
 	LIST_HEAD(gc_list);
 	int i, deleted = 0;
 
-	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+	fcp = this_cpu_ptr(fc->percpu);
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		hlist_for_each_entry_safe(fle, entry, tmp,
 					  &fcp->hash_table[i], u.hlist) {
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index db47a5a00ed2..d859bcc26cb7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -342,7 +342,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
 
 #ifdef CONFIG_NET_CLS_ROUTE
 	if (unlikely(skb_dst(skb)->tclassid)) {
-		struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id());
+		struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct);
 		u32 idx = skb_dst(skb)->tclassid;
 		st[idx&0xFF].o_packets++;
 		st[idx&0xFF].o_bytes += skb->len;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d5f84bd5a455..4e6ddfbab09e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2962,7 +2962,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
 	spin_unlock(&tcp_md5sig_pool_lock);
 
 	if (p)
-		return *per_cpu_ptr(p, smp_processor_id());
+		return *this_cpu_ptr(p);
 
 	local_bh_enable();
 	return NULL;
-- 
cgit v1.2.3-58-ga151


From 9618e2ffd78aaa43a5815e1dd456b4dd95f9e53b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 00:55:06 +0000
Subject: vlan: 64 bit rx counters

Use u64_stats_sync infrastructure to implement 64bit rx stats.

(tx stats are addressed later)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.h      | 13 ++++++++-----
 net/8021q/vlan_core.c |  7 ++++---
 net/8021q/vlan_dev.c  | 46 +++++++++++++++++++++++++++++-----------------
 3 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 6abdcac1b2e8..8d9503ad01da 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -2,6 +2,7 @@
 #define __BEN_VLAN_802_1Q_INC__
 
 #include <linux/if_vlan.h>
+#include <linux/u64_stats_sync.h>
 
 
 /**
@@ -21,14 +22,16 @@ struct vlan_priority_tci_mapping {
  *	struct vlan_rx_stats - VLAN percpu rx stats
  *	@rx_packets: number of received packets
  *	@rx_bytes: number of received bytes
- *	@multicast: number of received multicast packets
+ *	@rx_multicast: number of received multicast packets
+ *	@syncp: synchronization point for 64bit counters
  *	@rx_errors: number of errors
  */
 struct vlan_rx_stats {
-	unsigned long rx_packets;
-	unsigned long rx_bytes;
-	unsigned long multicast;
-	unsigned long rx_errors;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_multicast;
+	struct u64_stats_sync	syncp;
+	unsigned long		rx_errors;
 };
 
 /**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 50f58f5f1c34..1b9406a31f0c 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -41,9 +41,9 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 	skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats,
-			       smp_processor_id());
+	rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats);
 
+	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
 	rx_stats->rx_bytes += skb->len;
 
@@ -51,7 +51,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 	case PACKET_BROADCAST:
 		break;
 	case PACKET_MULTICAST:
-		rx_stats->multicast++;
+		rx_stats->rx_multicast++;
 		break;
 	case PACKET_OTHERHOST:
 		/* Our lower layer thinks this is not local, let's make sure.
@@ -62,6 +62,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 			skb->pkt_type = PACKET_HOST;
 		break;
 	}
+	u64_stats_update_end(&rx_stats->syncp);
 	return 0;
 }
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 529842677817..c6456cb842fa 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -166,6 +166,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 
 	rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
 			       smp_processor_id());
+	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
 	rx_stats->rx_bytes += skb->len;
 
@@ -182,7 +183,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 		break;
 
 	case PACKET_MULTICAST:
-		rx_stats->multicast++;
+		rx_stats->rx_multicast++;
 		break;
 
 	case PACKET_OTHERHOST:
@@ -197,6 +198,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	default:
 		break;
 	}
+	u64_stats_update_end(&rx_stats->syncp);
 
 	vlan_set_encap_proto(skb, vhdr);
 
@@ -801,27 +803,37 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 	return dev_ethtool_get_flags(vlan->real_dev);
 }
 
-static struct net_device_stats *vlan_dev_get_stats(struct net_device *dev)
+static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev)
 {
-	struct net_device_stats *stats = &dev->stats;
+	struct rtnl_link_stats64 *stats = &dev->stats64;
 
-	dev_txq_stats_fold(dev, stats);
+	dev_txq_stats_fold(dev, &dev->stats);
 
 	if (vlan_dev_info(dev)->vlan_rx_stats) {
-		struct vlan_rx_stats *p, rx = {0};
+		struct vlan_rx_stats *p, accum = {0};
 		int i;
 
 		for_each_possible_cpu(i) {
+			u64 rxpackets, rxbytes, rxmulticast;
+			unsigned int start;
+
 			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats, i);
-			rx.rx_packets += p->rx_packets;
-			rx.rx_bytes   += p->rx_bytes;
-			rx.rx_errors  += p->rx_errors;
-			rx.multicast  += p->multicast;
+			do {
+				start = u64_stats_fetch_begin_bh(&p->syncp);
+				rxpackets	= p->rx_packets;
+				rxbytes		= p->rx_bytes;
+				rxmulticast	= p->rx_multicast;
+			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
+			accum.rx_packets += rxpackets;
+			accum.rx_bytes   += rxbytes;
+			accum.rx_multicast += rxmulticast;
+			/* rx_errors is an ulong, not protected by syncp */
+			accum.rx_errors  += p->rx_errors;
 		}
-		stats->rx_packets = rx.rx_packets;
-		stats->rx_bytes   = rx.rx_bytes;
-		stats->rx_errors  = rx.rx_errors;
-		stats->multicast  = rx.multicast;
+		stats->rx_packets = accum.rx_packets;
+		stats->rx_bytes   = accum.rx_bytes;
+		stats->rx_errors  = accum.rx_errors;
+		stats->multicast  = accum.rx_multicast;
 	}
 	return stats;
 }
@@ -848,7 +860,7 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats		= vlan_dev_get_stats,
+	.ndo_get_stats64	= vlan_dev_get_stats64,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
@@ -872,7 +884,7 @@ static const struct net_device_ops vlan_netdev_accel_ops = {
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats		= vlan_dev_get_stats,
+	.ndo_get_stats64	= vlan_dev_get_stats64,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
@@ -897,7 +909,7 @@ static const struct net_device_ops vlan_netdev_ops_sq = {
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats		= vlan_dev_get_stats,
+	.ndo_get_stats64	= vlan_dev_get_stats64,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
@@ -922,7 +934,7 @@ static const struct net_device_ops vlan_netdev_accel_ops_sq = {
 	.ndo_change_rx_flags	= vlan_dev_change_rx_flags,
 	.ndo_do_ioctl		= vlan_dev_ioctl,
 	.ndo_neigh_setup	= vlan_dev_neigh_setup,
-	.ndo_get_stats		= vlan_dev_get_stats,
+	.ndo_get_stats64	= vlan_dev_get_stats64,
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	.ndo_fcoe_ddp_setup	= vlan_dev_fcoe_ddp_setup,
 	.ndo_fcoe_ddp_done	= vlan_dev_fcoe_ddp_done,
-- 
cgit v1.2.3-58-ga151


From c4ead4c595cd54cf7b1a184d4f888ce0d7cce7d4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 24 Jun 2010 01:00:22 +0000
Subject: tcp: tso_fragment() might avoid GFP_ATOMIC

We can pass a gfp argument to tso_fragment() and avoid GFP_ATOMIC
allocations sometimes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 51d316dbb058..25ff62e35a68 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1460,7 +1460,7 @@ int tcp_may_send_now(struct sock *sk)
  * packet has never been sent out before (and thus is not cloned).
  */
 static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
-			unsigned int mss_now)
+			unsigned int mss_now, gfp_t gfp)
 {
 	struct sk_buff *buff;
 	int nlen = skb->len - len;
@@ -1470,7 +1470,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	if (skb->len != skb->data_len)
 		return tcp_fragment(sk, skb, len, mss_now);
 
-	buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
+	buff = sk_stream_alloc_skb(sk, 0, gfp);
 	if (unlikely(buff == NULL))
 		return -ENOMEM;
 
@@ -1768,7 +1768,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 						    cwnd_quota);
 
 		if (skb->len > limit &&
-		    unlikely(tso_fragment(sk, skb, limit, mss_now)))
+		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
 			break;
 
 		TCP_SKB_CB(skb)->when = tcp_time_stamp;
-- 
cgit v1.2.3-58-ga151


From 210d6de78c5d7c785fc532556cea340e517955e1 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 24 Jun 2010 16:25:12 +0000
Subject: act_mirred: don't clone skb when skb isn't shared

don't clone skb when skb isn't shared

When the tcf_action is TC_ACT_STOLEN, and the skb isn't shared, we don't need
to clone a new skb. As the skb will be freed after this function returns, we
can use it freely once we get a reference to it.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/sch_generic.h |   11 +++++++++--
 net/sched/act_mirred.c    |    6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 11 +++++++++--
 net/sched/act_mirred.c    |  6 +++---
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b35301b0c7b6..977ec06ed0c7 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -594,9 +594,16 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
 }
 
 #ifdef CONFIG_NET_CLS_ACT
-static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask)
+static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
+					    int action)
 {
-	struct sk_buff *n = skb_clone(skb, gfp_mask);
+	struct sk_buff *n;
+
+	if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) &&
+	    !skb_shared(skb))
+		n = skb_get(skb);
+	else
+		n = skb_clone(skb, gfp_mask);
 
 	if (n) {
 		n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c0b6863e3b87..2e9a7b91aa10 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -169,13 +169,13 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 		goto out;
 	}
 
-	skb2 = skb_act_clone(skb, GFP_ATOMIC);
+	at = G_TC_AT(skb->tc_verd);
+	skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
 	if (skb2 == NULL)
 		goto out;
 
 	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
 	m->tcf_bstats.packets++;
-	at = G_TC_AT(skb->tc_verd);
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
 			skb_push(skb2, skb2->dev->hard_header_len);
@@ -185,8 +185,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
-	skb2->dev = dev;
 	skb2->skb_iif = skb->dev->ifindex;
+	skb2->dev = dev;
 	dev_queue_xmit(skb2);
 	err = 0;
 
-- 
cgit v1.2.3-58-ga151


From 01eebb53a65996dfbfb892bf5eb21ae831fbe3a6 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Sat, 26 Jun 2010 11:31:28 +0000
Subject: caif: Kconfig and Makefile fixes

Use "depends on" instead of "if" in Kconfig files.
Fixed CAIF debug flag, and removed unnecessary clean-* options.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/Kconfig  |  5 +----
 drivers/net/caif/Makefile | 10 ++--------
 net/caif/Kconfig          |  7 ++-----
 net/caif/Makefile         | 14 ++------------
 4 files changed, 7 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
index 0b28e0107697..6f33ee453f41 100644
--- a/drivers/net/caif/Kconfig
+++ b/drivers/net/caif/Kconfig
@@ -2,16 +2,13 @@
 # CAIF physical drivers
 #
 
-if CAIF
-
 comment "CAIF transport drivers"
 
 config CAIF_TTY
 	tristate "CAIF TTY transport driver"
+	depends on CAIF
 	default n
 	---help---
 	The CAIF TTY transport driver is a Line Discipline (ldisc)
 	identified as N_CAIF. When this ldisc is opened from user space
 	it will redirect the TTY's traffic into the CAIF stack.
-
-endif # CAIF
diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile
index 52b6d1f826f8..e6d3ca06ecfe 100644
--- a/drivers/net/caif/Makefile
+++ b/drivers/net/caif/Makefile
@@ -1,12 +1,6 @@
-ifeq ($(CONFIG_CAIF_DEBUG),1)
-CAIF_DBG_FLAGS := -DDEBUG
+ifeq ($(CONFIG_CAIF_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
 endif
 
-KBUILD_EXTRA_SYMBOLS=net/caif/Module.symvers
-
-ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS)
-clean-dirs:= .tmp_versions
-clean-files:= Module.symvers modules.order *.cmd *~ \
-
 # Serial interface
 obj-$(CONFIG_CAIF_TTY) += caif_serial.o
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index ed651786f16b..529750da9624 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -21,19 +21,18 @@ menuconfig CAIF
 	See Documentation/networking/caif for a further explanation on how to
 	use and configure CAIF.
 
-if CAIF
-
 config  CAIF_DEBUG
 	bool "Enable Debug"
+	depends on CAIF
 	default n
 	--- help ---
 	Enable the inclusion of debug code in the CAIF stack.
 	Be aware that doing this will impact performance.
 	If unsure say N.
 
-
 config CAIF_NETDEV
 	tristate "CAIF GPRS Network device"
+	depends on CAIF
 	default CAIF
 	---help---
 	Say Y if you will be using a CAIF based GPRS network device.
@@ -41,5 +40,3 @@ config CAIF_NETDEV
 	If you select to build it as a built-in then the main CAIF device must
 	also be a built-in.
 	If unsure say Y.
-
-endif
diff --git a/net/caif/Makefile b/net/caif/Makefile
index 34852af2595e..f87481fb0e65 100644
--- a/net/caif/Makefile
+++ b/net/caif/Makefile
@@ -1,23 +1,13 @@
-ifeq ($(CONFIG_CAIF_DEBUG),1)
-CAIF_DBG_FLAGS := -DDEBUG
+ifeq ($(CONFIG_CAIF_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
 endif
 
-ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS)
-
 caif-objs := caif_dev.o \
 	cfcnfg.o cfmuxl.o cfctrl.o  \
 	cffrml.o cfveil.o cfdbgl.o\
 	cfserl.o cfdgml.o  \
 	cfrfml.o cfvidl.o cfutill.o \
 	cfsrvl.o cfpkt_skbuff.o caif_config_util.o
-clean-dirs:= .tmp_versions
-
-clean-files:= \
-	Module.symvers \
-	modules.order \
-	*.cmd \
-	*.o \
-	*~
 
 obj-$(CONFIG_CAIF) += caif.o
 obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
-- 
cgit v1.2.3-58-ga151


From db048b69037e7fa6a7d9e95a1271a50dc08ae233 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 28 Jun 2010 08:44:07 +0000
Subject: ethtool: Fix potential kernel buffer overflow in ETHTOOL_GRXCLSRLALL

On a 32-bit machine, info.rule_cnt >= 0x40000000 leads to integer
overflow and the buffer may be smaller than needed.  Since
ETHTOOL_GRXCLSRLALL is unprivileged, this can presumably be used for at
least denial of service.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/ethtool.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a0f4964033d2..a3a7e9a48dff 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -347,8 +347,9 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
 		if (info.rule_cnt > 0) {
-			rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
-					   GFP_USER);
+			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
+				rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+						   GFP_USER);
 			if (!rule_buf)
 				return -ENOMEM;
 		}
-- 
cgit v1.2.3-58-ga151


From bf988435bd5b53529f4408a8efb1f433f6ddfda9 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Mon, 28 Jun 2010 08:45:58 +0000
Subject: ethtool: Fix potential user buffer overflow for ETHTOOL_{G, S}RXFH

struct ethtool_rxnfc was originally defined in 2.6.27 for the
ETHTOOL_{G,S}RXFH command with only the cmd, flow_type and data
fields.  It was then extended in 2.6.30 to support various additional
commands.  These commands should have been defined to use a new
structure, but it is too late to change that now.

Since user-space may still be using the old structure definition
for the ETHTOOL_{G,S}RXFH commands, and since they do not need the
additional fields, only copy the originally defined fields to and
from user-space.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Cc: stable@kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  2 ++
 net/core/ethtool.c      | 36 +++++++++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 276b40a16835..b4207ca3ad52 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -379,6 +379,8 @@ struct ethtool_rxnfc {
 	__u32				flow_type;
 	/* The rx flow hash value or the rule DB size */
 	__u64				data;
+	/* The following fields are not valid and must not be used for
+	 * the ETHTOOL_{G,X}RXFH commands. */
 	struct ethtool_rx_flow_spec	fs;
 	__u32				rule_cnt;
 	__u32				rule_locs[0];
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a3a7e9a48dff..75e4ffeb8cc9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -318,23 +318,33 @@ out:
 }
 
 static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
-						void __user *useraddr)
+						u32 cmd, void __user *useraddr)
 {
-	struct ethtool_rxnfc cmd;
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 
 	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_SRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
+	return dev->ethtool_ops->set_rxnfc(dev, &info);
 }
 
 static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
-						void __user *useraddr)
+						u32 cmd, void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
 	void *rule_buf = NULL;
@@ -342,7 +352,15 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 	if (!ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&info, useraddr, sizeof(info)))
+	/* struct ethtool_rxnfc was originally defined for
+	 * ETHTOOL_{G,S}RXFH with only the cmd, flow_type and data
+	 * members.  User-space might still be using that
+	 * definition. */
+	if (cmd == ETHTOOL_GRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
 	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
@@ -360,7 +378,7 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
 		goto err_out;
 
 	ret = -EFAULT;
-	if (copy_to_user(useraddr, &info, sizeof(info)))
+	if (copy_to_user(useraddr, &info, info_size))
 		goto err_out;
 
 	if (rule_buf) {
@@ -1517,12 +1535,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCLSRLCNT:
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
-		rc = ethtool_get_rxnfc(dev, useraddr);
+		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_SRXFH:
 	case ETHTOOL_SRXCLSRLDEL:
 	case ETHTOOL_SRXCLSRLINS:
-		rc = ethtool_set_rxnfc(dev, useraddr);
+		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
 		break;
 	case ETHTOOL_GGRO:
 		rc = ethtool_get_gro(dev, useraddr);
-- 
cgit v1.2.3-58-ga151


From d1e316891693f3e7266a1df9afda72763248ef7c Mon Sep 17 00:00:00 2001
From: Nicolas Kaiser <nikai@nikai.net>
Date: Sun, 27 Jun 2010 00:00:25 +0000
Subject: net/Makefile: conditionally descend to wireless and ieee802154

Don't descend to wireless and ieee802154 unless they are actually used.

Signed-off-by: Nicolas Kaiser <nikai@nikai.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/Makefile b/net/Makefile
index cb7bdc1210cb..41d420070a38 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -50,7 +50,7 @@ endif
 obj-$(CONFIG_IP_DCCP)		+= dccp/
 obj-$(CONFIG_IP_SCTP)		+= sctp/
 obj-$(CONFIG_RDS)		+= rds/
-obj-y				+= wireless/
+obj-$(CONFIG_WIRELESS)		+= wireless/
 obj-$(CONFIG_MAC80211)		+= mac80211/
 obj-$(CONFIG_TIPC)		+= tipc/
 obj-$(CONFIG_NETLABEL)		+= netlabel/
@@ -61,7 +61,7 @@ obj-$(CONFIG_CAIF)		+= caif/
 ifneq ($(CONFIG_DCB),)
 obj-y				+= dcb/
 endif
-obj-y				+= ieee802154/
+obj-$(CONFIG_IEEE802154)	+= ieee802154/
 
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
-- 
cgit v1.2.3-58-ga151


From f56619fc72407561b00c52244a2caa53d730bc4a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 26 Jun 2010 11:37:47 +0000
Subject: ipv6: Clamp reported valid_lft to a minimum of 0

Since addresses are only revalidated every 2 minutes, the reported
valid_lft can underflow shortly before the address is deleted.
Clamp it to a minimum of 0, as for prefered_lft.

Reported-by: Piotr Lewandowski <piotr.lewandowski@gmail.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c20a7c260a8f..51a273fe3d21 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3495,8 +3495,12 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 				preferred -= tval;
 			else
 				preferred = 0;
-			if (valid != INFINITY_LIFE_TIME)
-				valid -= tval;
+			if (valid != INFINITY_LIFE_TIME) {
+				if (valid > tval)
+					valid -= tval;
+				else
+					valid = 0;
+			}
 		}
 	} else {
 		preferred = INFINITY_LIFE_TIME;
-- 
cgit v1.2.3-58-ga151


From 784e2710ce3588d8316dc8efac9ecbebaeaf7c35 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Sat, 26 Jun 2010 11:42:55 +0000
Subject: ipv6: Use interface max_desync_factor instead of static default

max_desync_factor can be configured per-interface, but nothing is
using the value.

Reported-by: Piotr Lewandowski <piotr.lewandowski@gmail.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 51a273fe3d21..2514adf5251e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -121,8 +121,6 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
 static int __ipv6_regen_rndid(struct inet6_dev *idev);
 static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
 static void ipv6_regen_rndid(unsigned long data);
-
-static int desync_factor = MAX_DESYNC_FACTOR * HZ;
 #endif
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
@@ -893,7 +891,8 @@ retry:
 			      idev->cnf.temp_valid_lft);
 	tmp_prefered_lft = min_t(__u32,
 				 ifp->prefered_lft,
-				 idev->cnf.temp_prefered_lft - desync_factor / HZ);
+				 idev->cnf.temp_prefered_lft -
+				 idev->cnf.max_desync_factor);
 	tmp_plen = ifp->prefix_len;
 	max_addresses = idev->cnf.max_addresses;
 	tmp_cstamp = ifp->cstamp;
@@ -1653,7 +1652,8 @@ static void ipv6_regen_rndid(unsigned long data)
 
 	expires = jiffies +
 		idev->cnf.temp_prefered_lft * HZ -
-		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
+		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time -
+		idev->cnf.max_desync_factor * HZ;
 	if (time_before(expires, jiffies)) {
 		printk(KERN_WARNING
 			"ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
-- 
cgit v1.2.3-58-ga151


From 70777d03466e7a8a41b0d34677454c92f4e93d89 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 30 Jun 2010 10:39:19 -0700
Subject: net/core: use ntohs for skb->protocol

This is only noticed by people that are not doing everything correct in
the first place.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7f390b52caab..e85cc5fa3c4e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1537,7 +1537,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 				if (net_ratelimit())
 					printk(KERN_CRIT "protocol %04x is "
 					       "buggy, dev %s\n",
-					       skb2->protocol, dev->name);
+					       ntohs(skb2->protocol),
+					       dev->name);
 				skb_reset_network_header(skb2);
 			}
 
-- 
cgit v1.2.3-58-ga151


From 5acbf7f10b9e336510a1de79b4af06f6da9a8c5a Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 29 Jun 2010 22:54:58 +0000
Subject: act_mirred: combine duplicate code

act_mirred: combine duplicate code

tcf_bstats is updated in any way, so we can do it earlier to reduce the size of
the code.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
----
 net/sched/act_mirred.c |    6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_mirred.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 2e9a7b91aa10..a16b0175f890 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -160,6 +160,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 
 	spin_lock(&m->tcf_lock);
 	m->tcf_tm.lastuse = jiffies;
+	m->tcf_bstats.bytes += qdisc_pkt_len(skb);
+	m->tcf_bstats.packets++;
 
 	dev = m->tcfm_dev;
 	if (!(dev->flags & IFF_UP)) {
@@ -174,8 +176,6 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	if (skb2 == NULL)
 		goto out;
 
-	m->tcf_bstats.bytes += qdisc_pkt_len(skb2);
-	m->tcf_bstats.packets++;
 	if (!(at & AT_EGRESS)) {
 		if (m->tcfm_ok_push)
 			skb_push(skb2, skb2->dev->hard_header_len);
@@ -193,8 +193,6 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 out:
 	if (err) {
 		m->tcf_qstats.overlimits++;
-		m->tcf_bstats.bytes += qdisc_pkt_len(skb);
-		m->tcf_bstats.packets++;
 		/* should we be asking for packet to be dropped?
 		 * may make sense for redirect case only
 		 */
-- 
cgit v1.2.3-58-ga151


From 504f85c9d05f7c605306e808f0d835fe11bfd18d Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 29 Jun 2010 23:07:09 +0000
Subject: act_nat: use stack variable

act_nat: use stack variable

structure tc_nat isn't too big for stack, so we can put it in stack.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/sched/act_nat.c |   31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 570949417f38..0be49a4b4d8c 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -265,40 +265,29 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_nat *p = a->priv;
-	struct tc_nat *opt;
+	struct tc_nat opt;
 	struct tcf_t t;
-	int s;
 
-	s = sizeof(*opt);
+	opt.old_addr = p->old_addr;
+	opt.new_addr = p->new_addr;
+	opt.mask = p->mask;
+	opt.flags = p->flags;
 
-	/* netlink spinlocks held above us - must use ATOMIC */
-	opt = kzalloc(s, GFP_ATOMIC);
-	if (unlikely(!opt))
-		return -ENOBUFS;
+	opt.index = p->tcf_index;
+	opt.action = p->tcf_action;
+	opt.refcnt = p->tcf_refcnt - ref;
+	opt.bindcnt = p->tcf_bindcnt - bind;
 
-	opt->old_addr = p->old_addr;
-	opt->new_addr = p->new_addr;
-	opt->mask = p->mask;
-	opt->flags = p->flags;
-
-	opt->index = p->tcf_index;
-	opt->action = p->tcf_action;
-	opt->refcnt = p->tcf_refcnt - ref;
-	opt->bindcnt = p->tcf_bindcnt - bind;
-
-	NLA_PUT(skb, TCA_NAT_PARMS, s, opt);
+	NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt);
 	t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
 	t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
 	t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
 	NLA_PUT(skb, TCA_NAT_TM, sizeof(t), &t);
 
-	kfree(opt);
-
 	return skb->len;
 
 nla_put_failure:
 	nlmsg_trim(skb, b);
-	kfree(opt);
 	return -1;
 }
 
-- 
cgit v1.2.3-58-ga151


From 4ce3c183fcade7f4b30a33dae90cd774c3d9e094 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 30 Jun 2010 13:31:19 -0700
Subject: snmp: 64bit ipstats_mib for all arches

/proc/net/snmp and /proc/net/netstat expose SNMP counters.

Width of these counters is either 32 or 64 bits, depending on the size
of "unsigned long" in kernel.

This means user program parsing these files must already be prepared to
deal with 64bit values, regardless of user program being 32 or 64 bit.

This patch introduces 64bit snmp values for IPSTAT mib, where some
counters can wrap pretty fast if they are 32bit wide.

# netstat -s|egrep "InOctets|OutOctets"
    InOctets: 244068329096
    OutOctets: 244069348848

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h    | 20 +++++++++-----
 include/net/ipv6.h  | 12 ++++-----
 include/net/snmp.h  | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 net/ipv4/af_inet.c  | 36 +++++++++++++++++++++++++
 net/ipv4/proc.c     | 15 ++++++-----
 net/ipv6/addrconf.c | 18 ++++++++++++-
 net/ipv6/proc.c     | 17 +++++++++---
 7 files changed, 167 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index 3b524df7dddb..890f9725d681 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -165,12 +165,12 @@ struct ipv4_config {
 };
 
 extern struct ipv4_config ipv4_config;
-#define IP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.ip_statistics, field)
-#define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.ip_statistics, field)
-#define IP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.ip_statistics, field, val)
-#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS_BH((net)->mib.ip_statistics, field, val)
-#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS((net)->mib.ip_statistics, field, val)
-#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS_BH((net)->mib.ip_statistics, field, val)
+#define IP_INC_STATS(net, field)	SNMP_INC_STATS64((net)->mib.ip_statistics, field)
+#define IP_INC_STATS_BH(net, field)	SNMP_INC_STATS64_BH((net)->mib.ip_statistics, field)
+#define IP_ADD_STATS(net, field, val)	SNMP_ADD_STATS64((net)->mib.ip_statistics, field, val)
+#define IP_ADD_STATS_BH(net, field, val) SNMP_ADD_STATS64_BH((net)->mib.ip_statistics, field, val)
+#define IP_UPD_PO_STATS(net, field, val) SNMP_UPD_PO_STATS64((net)->mib.ip_statistics, field, val)
+#define IP_UPD_PO_STATS_BH(net, field, val) SNMP_UPD_PO_STATS64_BH((net)->mib.ip_statistics, field, val)
 #define NET_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.net_statistics, field)
 #define NET_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.net_statistics, field)
 #define NET_INC_STATS_USER(net, field) 	SNMP_INC_STATS_USER((net)->mib.net_statistics, field)
@@ -178,6 +178,14 @@ extern struct ipv4_config ipv4_config;
 #define NET_ADD_STATS_USER(net, field, adnd) SNMP_ADD_STATS_USER((net)->mib.net_statistics, field, adnd)
 
 extern unsigned long snmp_fold_field(void __percpu *mib[], int offt);
+#if BITS_PER_LONG==32
+extern u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t sync_off);
+#else
+static inline u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_off)
+{
+	return snmp_fold_field(mib, offt);
+}
+#endif
 extern int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align);
 extern void snmp_mib_free(void __percpu *ptr[2]);
 
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index f5808d596aab..1f8412410998 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -136,17 +136,17 @@ extern struct ctl_path net_ipv6_ctl_path[];
 /* MIBs */
 
 #define IP6_INC_STATS(net, idev,field)		\
-		_DEVINC(net, ipv6, , idev, field)
+		_DEVINC(net, ipv6, 64, idev, field)
 #define IP6_INC_STATS_BH(net, idev,field)	\
-		_DEVINC(net, ipv6, _BH, idev, field)
+		_DEVINC(net, ipv6, 64_BH, idev, field)
 #define IP6_ADD_STATS(net, idev,field,val)	\
-		_DEVADD(net, ipv6, , idev, field, val)
+		_DEVADD(net, ipv6, 64, idev, field, val)
 #define IP6_ADD_STATS_BH(net, idev,field,val)	\
-		_DEVADD(net, ipv6, _BH, idev, field, val)
+		_DEVADD(net, ipv6, 64_BH, idev, field, val)
 #define IP6_UPD_PO_STATS(net, idev,field,val)   \
-		_DEVUPD(net, ipv6, , idev, field, val)
+		_DEVUPD(net, ipv6, 64, idev, field, val)
 #define IP6_UPD_PO_STATS_BH(net, idev,field,val)   \
-		_DEVUPD(net, ipv6, _BH, idev, field, val)
+		_DEVUPD(net, ipv6, 64_BH, idev, field, val)
 #define ICMP6_INC_STATS(net, idev, field)	\
 		_DEVINC(net, icmpv6, , idev, field)
 #define ICMP6_INC_STATS_BH(net, idev, field)	\
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 899003d18db9..a0e61806d480 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -47,15 +47,16 @@ struct snmp_mib {
 }
 
 /*
- * We use all unsigned longs. Linux will soon be so reliable that even 
- * these will rapidly get too small 8-). Seriously consider the IpInReceives 
- * count on the 20Gb/s + networks people expect in a few years time!
+ * We use unsigned longs for most mibs but u64 for ipstats.
  */
+#include <linux/u64_stats_sync.h>
 
 /* IPstats */
 #define IPSTATS_MIB_MAX	__IPSTATS_MIB_MAX
 struct ipstats_mib {
-	unsigned long	mibs[IPSTATS_MIB_MAX];
+	/* mibs[] must be first field of struct ipstats_mib */
+	u64		mibs[IPSTATS_MIB_MAX];
+	struct u64_stats_sync syncp;
 };
 
 /* ICMP */
@@ -155,4 +156,70 @@ struct linux_xfrm_mib {
 		ptr->mibs[basefield##PKTS]++; \
 		ptr->mibs[basefield##OCTETS] += addend;\
 	} while (0)
+
+
+#if BITS_PER_LONG==32
+
+#define SNMP_ADD_STATS64_BH(mib, field, addend) 			\
+	do {								\
+		__typeof__(*mib[0]) *ptr = __this_cpu_ptr((mib)[0]);	\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+	} while (0)
+#define SNMP_ADD_STATS64_USER(mib, field, addend) 			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[1]);				\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_ADD_STATS64(mib, field, addend)				\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[field] += addend;				\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_INC_STATS64_BH(mib, field) SNMP_ADD_STATS64_BH(mib, field, 1)
+#define SNMP_INC_STATS64_USER(mib, field) SNMP_ADD_STATS64_USER(mib, field, 1)
+#define SNMP_INC_STATS64(mib, field) SNMP_ADD_STATS64(mib, field, 1)
+#define SNMP_UPD_PO_STATS64(mib, basefield, addend)			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		preempt_disable();					\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[basefield##PKTS]++;				\
+		ptr->mibs[basefield##OCTETS] += addend;			\
+		u64_stats_update_end(&ptr->syncp);			\
+		preempt_enable();					\
+	} while (0)
+#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend)			\
+	do {								\
+		__typeof__(*mib[0]) *ptr;				\
+		ptr = __this_cpu_ptr((mib)[!in_softirq()]);		\
+		u64_stats_update_begin(&ptr->syncp);			\
+		ptr->mibs[basefield##PKTS]++;				\
+		ptr->mibs[basefield##OCTETS] += addend;			\
+		u64_stats_update_end(&ptr->syncp);			\
+	} while (0)
+#else
+#define SNMP_INC_STATS64_BH(mib, field)		SNMP_INC_STATS_BH(mib, field)
+#define SNMP_INC_STATS64_USER(mib, field)	SNMP_INC_STATS_USER(mib, field)
+#define SNMP_INC_STATS64(mib, field)		SNMP_INC_STATS(mib, field)
+#define SNMP_DEC_STATS64(mib, field)		SNMP_DEC_STATS(mib, field)
+#define SNMP_ADD_STATS64_BH(mib, field, addend) SNMP_ADD_STATS_BH(mib, field, addend)
+#define SNMP_ADD_STATS64_USER(mib, field, addend) SNMP_ADD_STATS_USER(mib, field, addend)
+#define SNMP_ADD_STATS64(mib, field, addend)	SNMP_ADD_STATS(mib, field, addend)
+#define SNMP_UPD_PO_STATS64(mib, basefield, addend) SNMP_UPD_PO_STATS(mib, basefield, addend)
+#define SNMP_UPD_PO_STATS64_BH(mib, basefield, addend) SNMP_UPD_PO_STATS_BH(mib, basefield, addend)
+#endif
+
 #endif
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 640db9b90330..3ceb025b16f2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1427,6 +1427,42 @@ unsigned long snmp_fold_field(void __percpu *mib[], int offt)
 }
 EXPORT_SYMBOL_GPL(snmp_fold_field);
 
+#if BITS_PER_LONG==32
+
+u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
+{
+	u64 res = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		void *bhptr, *userptr;
+		struct u64_stats_sync *syncp;
+		u64 v_bh, v_user;
+		unsigned int start;
+
+		/* first mib used by softirq context, we must use _bh() accessors */
+		bhptr = per_cpu_ptr(SNMP_STAT_BHPTR(mib), cpu);
+		syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
+		do {
+			start = u64_stats_fetch_begin_bh(syncp);
+			v_bh = *(((u64 *) bhptr) + offt);
+		} while (u64_stats_fetch_retry_bh(syncp, start));
+
+		/* second mib used in USER context */
+		userptr = per_cpu_ptr(SNMP_STAT_USRPTR(mib), cpu);
+		syncp = (struct u64_stats_sync *)(userptr + syncp_offset);
+		do {
+			start = u64_stats_fetch_begin(syncp);
+			v_user = *(((u64 *) userptr) + offt);
+		} while (u64_stats_fetch_retry(syncp, start));
+
+		res += v_bh + v_user;
+	}
+	return res;
+}
+EXPORT_SYMBOL_GPL(snmp_fold_field64);
+#endif
+
 int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
 {
 	BUG_ON(ptr == NULL);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e320ca6b3ef3..4ae1f203f7cb 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -343,10 +343,12 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
 		   IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
 		   sysctl_ip_default_ttl);
 
+	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
-					   snmp4_ipstats_list[i].entry));
+		seq_printf(seq, " %llu",
+			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+					     snmp4_ipstats_list[i].entry,
+					     offsetof(struct ipstats_mib, syncp)));
 
 	icmp_put(seq);	/* RFC 2011 compatibility */
 	icmpmsg_put(seq);
@@ -432,9 +434,10 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
 
 	seq_puts(seq, "\nIpExt:");
 	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field((void __percpu **)net->mib.ip_statistics,
-					   snmp4_ipextstats_list[i].entry));
+		seq_printf(seq, " %llu",
+			   snmp_fold_field64((void __percpu **)net->mib.ip_statistics,
+					     snmp4_ipextstats_list[i].entry,
+					     offsetof(struct ipstats_mib, syncp)));
 
 	seq_putc(seq, '\n');
 	return 0;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2514adf5251e..e81155d2f251 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3862,12 +3862,28 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
 	memset(&stats[items], 0, pad);
 }
 
+static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib,
+				      int items, int bytes, size_t syncpoff)
+{
+	int i;
+	int pad = bytes - sizeof(u64) * items;
+	BUG_ON(pad < 0);
+
+	/* Use put_unaligned() because stats may not be aligned for u64. */
+	put_unaligned(items, &stats[0]);
+	for (i = 1; i < items; i++)
+		put_unaligned(snmp_fold_field64(mib, i, syncpoff), &stats[i]);
+
+	memset(&stats[items], 0, pad);
+}
+
 static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 			     int bytes)
 {
 	switch (attrtype) {
 	case IFLA_INET6_STATS:
-		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+		__snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6,
+				     IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));
 		break;
 	case IFLA_INET6_ICMP6STATS:
 		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 566798d69f37..d082eaeefa25 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -174,17 +174,28 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
 				const struct snmp_mib *itemlist)
 {
 	int i;
-	for (i=0; itemlist[i].name; i++)
+
+	for (i = 0; itemlist[i].name; i++)
 		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
 			   snmp_fold_field(mib, itemlist[i].entry));
 }
 
+static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib,
+				  const struct snmp_mib *itemlist, size_t syncpoff)
+{
+	int i;
+
+	for (i = 0; itemlist[i].name; i++)
+		seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
+			   snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+}
+
 static int snmp6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = (struct net *)seq->private;
 
-	snmp6_seq_show_item(seq, (void __percpu **)net->mib.ipv6_statistics,
-			    snmp6_ipstats_list);
+	snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics,
+			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));
 	snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics,
 			    snmp6_icmp6_list);
 	snmp6_seq_show_icmpv6msg(seq,
-- 
cgit v1.2.3-58-ga151


From d6bebca92c663fb216c072193945946f3807ca7f Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 29 Jun 2010 04:39:37 +0000
Subject: fragment: add fast path for in-order fragments

add fast path for in-order fragments

As the fragments are sent in order in most of OSes, such as Windows, Darwin and
FreeBSD, it is likely the new fragments are at the end of the inet_frag_queue.
In the fast path, we check if the skb at the end of the inet_frag_queue is the
prev we expect.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_frag.h |    1 +
 net/ipv4/ip_fragment.c  |   12 ++++++++++++
 net/ipv6/reassembly.c   |   11 +++++++++++
 3 files changed, 24 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h |  1 +
 net/ipv4/ip_fragment.c  | 12 ++++++++++++
 net/ipv6/reassembly.c   | 11 +++++++++++
 3 files changed, 24 insertions(+)

(limited to 'net')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 39f2dc943908..16ff29a7bb30 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -20,6 +20,7 @@ struct inet_frag_queue {
 	atomic_t		refcnt;
 	struct timer_list	timer;      /* when will this queue expire? */
 	struct sk_buff		*fragments; /* list of received fragments */
+	struct sk_buff		*fragments_tail;
 	ktime_t			stamp;
 	int			len;        /* total length of orig datagram */
 	int			meat;
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 858d34648eee..dd0dbf0c6b7f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -314,6 +314,7 @@ static int ip_frag_reinit(struct ipq *qp)
 	qp->q.len = 0;
 	qp->q.meat = 0;
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	qp->iif = 0;
 
 	return 0;
@@ -386,6 +387,11 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = qp->q.fragments_tail;
+	if (!prev || FRAG_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for (next = qp->q.fragments; next != NULL; next = next->next) {
 		if (FRAG_CB(next)->offset >= offset)
@@ -393,6 +399,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -451,6 +458,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		qp->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -504,6 +513,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 			goto out_nomem;
 
 		fp->next = head->next;
+		if (!fp->next)
+			qp->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, qp->q.fragments);
@@ -574,6 +585,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
 	iph->tot_len = htons(len);
 	IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
 	qp->q.fragments = NULL;
+	qp->q.fragments_tail = NULL;
 	return 0;
 
 out_nomem:
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 0b97230a3251..545c4141b755 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -333,6 +333,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	 * in the chain of fragments so far.  We must know where to put
 	 * this fragment, right?
 	 */
+	prev = fq->q.fragments_tail;
+	if (!prev || FRAG6_CB(prev)->offset < offset) {
+		next = NULL;
+		goto found;
+	}
 	prev = NULL;
 	for(next = fq->q.fragments; next != NULL; next = next->next) {
 		if (FRAG6_CB(next)->offset >= offset)
@@ -340,6 +345,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 		prev = next;
 	}
 
+found:
 	/* We found where to put this one.  Check for overlap with
 	 * preceding fragment, and, if needed, align things so that
 	 * any overlaps are eliminated.
@@ -397,6 +403,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 
 	/* Insert this fragment in the chain of fragments. */
 	skb->next = next;
+	if (!next)
+		fq->q.fragments_tail = skb;
 	if (prev)
 		prev->next = skb;
 	else
@@ -463,6 +471,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 			goto out_oom;
 
 		fp->next = head->next;
+		if (!fp->next)
+			fq->q.fragments_tail = fp;
 		prev->next = fp;
 
 		skb_morph(head, fq->q.fragments);
@@ -549,6 +559,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
 	fq->q.fragments = NULL;
+	fq->q.fragments_tail = NULL;
 	return 1;
 
 out_oversize:
-- 
cgit v1.2.3-58-ga151


From 1437ce3983bcbc0447a0dedcd644c14fe833d266 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 30 Jun 2010 02:44:32 +0000
Subject: ethtool: Change ethtool_op_set_flags to validate flags

ethtool_op_set_flags() does not check for unsupported flags, and has
no way of doing so.  This means it is not suitable for use as a
default implementation of ethtool_ops::set_flags.

Add a 'supported' parameter specifying the flags that the driver and
hardware support, validate the requested flags against this, and
change all current callers to pass this parameter.

Change some other trivial implementations of ethtool_ops::set_flags to
call ethtool_op_set_flags().

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Reviewed-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/cxgb4/cxgb4_main.c    |  9 +--------
 drivers/net/enic/enic_main.c      |  1 -
 drivers/net/ixgbe/ixgbe_ethtool.c |  5 ++++-
 drivers/net/mv643xx_eth.c         |  7 ++++++-
 drivers/net/myri10ge/myri10ge.c   | 10 +++++++---
 drivers/net/niu.c                 |  9 +--------
 drivers/net/sfc/ethtool.c         |  5 +----
 drivers/net/sky2.c                | 16 ++++++----------
 include/linux/ethtool.h           |  2 +-
 net/core/ethtool.c                | 28 +++++-----------------------
 10 files changed, 32 insertions(+), 60 deletions(-)

(limited to 'net')

diff --git a/drivers/net/cxgb4/cxgb4_main.c b/drivers/net/cxgb4/cxgb4_main.c
index 65281674de91..55a720e4abdc 100644
--- a/drivers/net/cxgb4/cxgb4_main.c
+++ b/drivers/net/cxgb4/cxgb4_main.c
@@ -1799,14 +1799,7 @@ static int set_tso(struct net_device *dev, u32 value)
 
 static int set_flags(struct net_device *dev, u32 flags)
 {
-	if (flags & ~ETH_FLAG_RXHASH)
-		return -EOPNOTSUPP;
-
-	if (flags & ETH_FLAG_RXHASH)
-		dev->features |= NETIF_F_RXHASH;
-	else
-		dev->features &= ~NETIF_F_RXHASH;
-	return 0;
+	return ethtool_op_set_flags(dev, flags, ETH_FLAG_RXHASH);
 }
 
 static struct ethtool_ops cxgb_ethtool_ops = {
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 6c6795b90fa6..77a7f87d498e 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -365,7 +365,6 @@ static const struct ethtool_ops enic_ethtool_ops = {
 	.get_coalesce = enic_get_coalesce,
 	.set_coalesce = enic_set_coalesce,
 	.get_flags = ethtool_op_get_flags,
-	.set_flags = ethtool_op_set_flags,
 };
 
 static void enic_free_wq_buf(struct vnic_wq *wq, struct vnic_wq_buf *buf)
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index 873b45efca40..7d2e5ea2deba 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -2205,8 +2205,11 @@ static int ixgbe_set_flags(struct net_device *netdev, u32 data)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	bool need_reset = false;
+	int rc;
 
-	ethtool_op_set_flags(netdev, data);
+	rc = ethtool_op_set_flags(netdev, data, ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+	if (rc)
+		return rc;
 
 	/* if state changes we need to update adapter->flags and reset */
 	if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) {
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index e345ec8cb473..82b720f29c75 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1636,6 +1636,11 @@ static void mv643xx_eth_get_ethtool_stats(struct net_device *dev,
 	}
 }
 
+static int mv643xx_eth_set_flags(struct net_device *dev, u32 data)
+{
+	return ethtool_op_set_flags(dev, data, ETH_FLAG_LRO);
+}
+
 static int mv643xx_eth_get_sset_count(struct net_device *dev, int sset)
 {
 	if (sset == ETH_SS_STATS)
@@ -1661,7 +1666,7 @@ static const struct ethtool_ops mv643xx_eth_ethtool_ops = {
 	.get_strings		= mv643xx_eth_get_strings,
 	.get_ethtool_stats	= mv643xx_eth_get_ethtool_stats,
 	.get_flags		= ethtool_op_get_flags,
-	.set_flags		= ethtool_op_set_flags,
+	.set_flags		= mv643xx_eth_set_flags,
 	.get_sset_count		= mv643xx_eth_get_sset_count,
 };
 
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e0b47cc8a86e..d771d1650d60 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -1730,8 +1730,7 @@ static int myri10ge_set_rx_csum(struct net_device *netdev, u32 csum_enabled)
 	if (csum_enabled)
 		mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
 	else {
-		u32 flags = ethtool_op_get_flags(netdev);
-		err = ethtool_op_set_flags(netdev, (flags & ~ETH_FLAG_LRO));
+		netdev->features &= ~NETIF_F_LRO;
 		mgp->csum_flag = 0;
 
 	}
@@ -1900,6 +1899,11 @@ static u32 myri10ge_get_msglevel(struct net_device *netdev)
 	return mgp->msg_enable;
 }
 
+static int myri10ge_set_flags(struct net_device *netdev, u32 value)
+{
+	return ethtool_op_set_flags(netdev, value, ETH_FLAG_LRO);
+}
+
 static const struct ethtool_ops myri10ge_ethtool_ops = {
 	.get_settings = myri10ge_get_settings,
 	.get_drvinfo = myri10ge_get_drvinfo,
@@ -1920,7 +1924,7 @@ static const struct ethtool_ops myri10ge_ethtool_ops = {
 	.set_msglevel = myri10ge_set_msglevel,
 	.get_msglevel = myri10ge_get_msglevel,
 	.get_flags = ethtool_op_get_flags,
-	.set_flags = ethtool_op_set_flags
+	.set_flags = myri10ge_set_flags
 };
 
 static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss)
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 63e8e3893bd6..3d523cb7975a 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -7920,14 +7920,7 @@ static int niu_phys_id(struct net_device *dev, u32 data)
 
 static int niu_set_flags(struct net_device *dev, u32 data)
 {
-	if (data & (ETH_FLAG_LRO | ETH_FLAG_NTUPLE))
-		return -EOPNOTSUPP;
-
-	if (data & ETH_FLAG_RXHASH)
-		dev->features |= NETIF_F_RXHASH;
-	else
-		dev->features &= ~NETIF_F_RXHASH;
-	return 0;
+	return ethtool_op_set_flags(dev, data, ETH_FLAG_RXHASH);
 }
 
 static const struct ethtool_ops niu_ethtool_ops = {
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 7693cfbf9cf4..23372bf5cd59 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -551,10 +551,7 @@ static int efx_ethtool_set_flags(struct net_device *net_dev, u32 data)
 	struct efx_nic *efx = netdev_priv(net_dev);
 	u32 supported = efx->type->offload_features & ETH_FLAG_RXHASH;
 
-	if (data & ~supported)
-		return -EOPNOTSUPP;
-
-	return ethtool_op_set_flags(net_dev, data);
+	return ethtool_op_set_flags(net_dev, data, supported);
 }
 
 static void efx_ethtool_self_test(struct net_device *net_dev,
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 7985165e84fc..c762c6ac055b 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -4188,17 +4188,13 @@ static int sky2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom
 static int sky2_set_flags(struct net_device *dev, u32 data)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
+	u32 supported =
+		(sky2->hw->flags & SKY2_HW_RSS_BROKEN) ? 0 : ETH_FLAG_RXHASH;
+	int rc;
 
-	if (data & ~ETH_FLAG_RXHASH)
-		return -EOPNOTSUPP;
-
-	if (data & ETH_FLAG_RXHASH) {
-		if (sky2->hw->flags & SKY2_HW_RSS_BROKEN)
-			return -EINVAL;
-
-		dev->features |= NETIF_F_RXHASH;
-	} else
-		dev->features &= ~NETIF_F_RXHASH;
+	rc = ethtool_op_set_flags(dev, data, supported);
+	if (rc)
+		return rc;
 
 	rx_set_rss(dev);
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 2c8af093d8b3..084ddb3c8032 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -457,7 +457,7 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data);
 u32 ethtool_op_get_ufo(struct net_device *dev);
 int ethtool_op_set_ufo(struct net_device *dev, u32 data);
 u32 ethtool_op_get_flags(struct net_device *dev);
-int ethtool_op_set_flags(struct net_device *dev, u32 data);
+int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported);
 void ethtool_ntuple_flush(struct net_device *dev);
 
 /**
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a0f4964033d2..5d42fae520d9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -144,31 +144,13 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 }
 EXPORT_SYMBOL(ethtool_op_get_flags);
 
-int ethtool_op_set_flags(struct net_device *dev, u32 data)
+int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
 {
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-	unsigned long features = dev->features;
-
-	if (data & ETH_FLAG_LRO)
-		features |= NETIF_F_LRO;
-	else
-		features &= ~NETIF_F_LRO;
-
-	if (data & ETH_FLAG_NTUPLE) {
-		if (!ops->set_rx_ntuple)
-			return -EOPNOTSUPP;
-		features |= NETIF_F_NTUPLE;
-	} else {
-		/* safe to clear regardless */
-		features &= ~NETIF_F_NTUPLE;
-	}
-
-	if (data & ETH_FLAG_RXHASH)
-		features |= NETIF_F_RXHASH;
-	else
-		features &= ~NETIF_F_RXHASH;
+	if (data & ~supported)
+		return -EINVAL;
 
-	dev->features = features;
+	dev->features = ((dev->features & ~flags_dup_features) |
+			 (data & flags_dup_features));
 	return 0;
 }
 EXPORT_SYMBOL(ethtool_op_set_flags);
-- 
cgit v1.2.3-58-ga151


From a5b6ee291e39e285e021cf251dbcf770c83cd74e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Wed, 30 Jun 2010 05:05:23 +0000
Subject: ethtool: Add support for control of RX flow hash indirection

Many NICs use an indirection table to map an RX flow hash value to one
of an arbitrary number of queues (not necessarily a power of 2).  It
can be useful to remove some queues from this indirection table so
that they are only used for flows that are specifically filtered
there.  It may also be useful to weight the mapping to account for
user processes with the same CPU-affinity as the RX interrupts.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 15 ++++++++++
 net/core/ethtool.c      | 80 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

(limited to 'net')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 084ddb3c8032..c1be61f3938b 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -384,6 +384,15 @@ struct ethtool_rxnfc {
 	__u32				rule_locs[0];
 };
 
+struct ethtool_rxfh_indir {
+	__u32	cmd;
+	/* On entry, this is the array size of the user buffer.  On
+	 * return from ETHTOOL_GRXFHINDIR, this is the array size of
+	 * the hardware indirection table. */
+	__u32	size;
+	__u32	ring_index[0];	/* ring/queue index for each hash value */
+};
+
 struct ethtool_rx_ntuple_flow_spec {
 	__u32		 flow_type;
 	union {
@@ -576,6 +585,10 @@ struct ethtool_ops {
 	int	(*set_rx_ntuple)(struct net_device *,
 				 struct ethtool_rx_ntuple *);
 	int	(*get_rx_ntuple)(struct net_device *, u32 stringset, void *);
+	int	(*get_rxfh_indir)(struct net_device *,
+				  struct ethtool_rxfh_indir *);
+	int	(*set_rxfh_indir)(struct net_device *,
+				  const struct ethtool_rxfh_indir *);
 };
 #endif /* __KERNEL__ */
 
@@ -637,6 +650,8 @@ struct ethtool_ops {
 #define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
 #define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
 #define ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
+#define ETHTOOL_GRXFHINDIR	0x00000038 /* Get RX flow hash indir'n table */
+#define ETHTOOL_SRXFHINDIR	0x00000039 /* Set RX flow hash indir'n table */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 5d42fae520d9..072d1d3796cb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -358,6 +358,80 @@ err_out:
 	return ret;
 }
 
+static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxfh_indir *indir;
+	u32 table_size;
+	size_t full_size;
+	int ret;
+
+	if (!dev->ethtool_ops->get_rxfh_indir)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&table_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(table_size)))
+		return -EFAULT;
+
+	if (table_size >
+	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+		return -ENOMEM;
+	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+	indir = kmalloc(full_size, GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	indir->cmd = ETHTOOL_GRXFHINDIR;
+	indir->size = table_size;
+	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+	if (ret)
+		goto out;
+
+	if (copy_to_user(useraddr, indir, full_size))
+		ret = -EFAULT;
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
+						     void __user *useraddr)
+{
+	struct ethtool_rxfh_indir *indir;
+	u32 table_size;
+	size_t full_size;
+	int ret;
+
+	if (!dev->ethtool_ops->set_rxfh_indir)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&table_size,
+			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			   sizeof(table_size)))
+		return -EFAULT;
+
+	if (table_size >
+	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
+		return -ENOMEM;
+	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
+	indir = kmalloc(full_size, GFP_USER);
+	if (!indir)
+		return -ENOMEM;
+
+	if (copy_from_user(indir, useraddr, full_size)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
+
+out:
+	kfree(indir);
+	return ret;
+}
+
 static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 			struct ethtool_rx_ntuple_flow_spec *spec,
 			struct ethtool_rx_ntuple_flow_spec_container *fsc)
@@ -1526,6 +1600,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GSSET_INFO:
 		rc = ethtool_get_sset_info(dev, useraddr);
 		break;
+	case ETHTOOL_GRXFHINDIR:
+		rc = ethtool_get_rxfh_indir(dev, useraddr);
+		break;
+	case ETHTOOL_SRXFHINDIR:
+		rc = ethtool_set_rxfh_indir(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3-58-ga151


From 4df53d8bab65cf2c18daebd51a5a4847e03f1943 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 2 Jul 2010 09:32:57 +0200
Subject: bridge: add per bridge device controls for invoking iptables

Support more fine grained control of bridge netfilter iptables invocation
by adding seperate brnf_call_*tables parameters for each device using the
sysfs interface. Packets are passed to layer 3 netfilter when either the
global parameter or the per bridge parameter is enabled.

Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 31 ++++++++++++++------
 net/bridge/br_private.h   |  3 ++
 net/bridge/br_sysfs_br.c  | 72 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6bb6f7c9e6e1..f1d49ae23411 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -55,6 +55,9 @@ static int brnf_call_arptables __read_mostly = 1;
 static int brnf_filter_vlan_tagged __read_mostly = 0;
 static int brnf_filter_pppoe_tagged __read_mostly = 0;
 #else
+#define brnf_call_iptables 1
+#define brnf_call_ip6tables 1
+#define brnf_call_arptables 1
 #define brnf_filter_vlan_tagged 0
 #define brnf_filter_pppoe_tagged 0
 #endif
@@ -543,25 +546,30 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
+	struct net_bridge_port *p;
+	struct net_bridge *br;
 	struct iphdr *iph;
 	__u32 len = nf_bridge_encap_header_len(skb);
 
 	if (unlikely(!pskb_may_pull(skb, len)))
 		goto out;
 
+	p = rcu_dereference(in->br_port);
+	if (p == NULL)
+		goto out;
+	br = p->br;
+
 	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
 	    IS_PPPOE_IPV6(skb)) {
-#ifdef CONFIG_SYSCTL
-		if (!brnf_call_ip6tables)
+		if (!brnf_call_ip6tables && !br->nf_call_ip6tables)
 			return NF_ACCEPT;
-#endif
+
 		nf_bridge_pull_encap_header_rcsum(skb);
 		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
 	}
-#ifdef CONFIG_SYSCTL
-	if (!brnf_call_iptables)
+
+	if (!brnf_call_iptables && !br->nf_call_iptables)
 		return NF_ACCEPT;
-#endif
 
 	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
 	    !IS_PPPOE_IP(skb))
@@ -714,12 +722,17 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 				      const struct net_device *out,
 				      int (*okfn)(struct sk_buff *))
 {
+	struct net_bridge_port *p;
+	struct net_bridge *br;
 	struct net_device **d = (struct net_device **)(skb->cb);
 
-#ifdef CONFIG_SYSCTL
-	if (!brnf_call_arptables)
+	p = rcu_dereference(out->br_port);
+	if (p == NULL)
+		return NF_ACCEPT;
+	br = p->br;
+
+	if (!brnf_call_arptables && !br->nf_call_arptables)
 		return NF_ACCEPT;
-#endif
 
 	if (skb->protocol != htons(ETH_P_ARP)) {
 		if (!IS_VLAN_ARP(skb))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c83519b555bb..7484065da303 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -164,6 +164,9 @@ struct net_bridge
 	unsigned long			feature_mask;
 #ifdef CONFIG_BRIDGE_NETFILTER
 	struct rtable 			fake_rtable;
+	bool				nf_call_iptables;
+	bool				nf_call_ip6tables;
+	bool				nf_call_arptables;
 #endif
 	unsigned long			flags;
 #define BR_SET_MAC_ADDR		0x00000001
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 486b8f3861d2..5c1e5559ebba 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -611,6 +611,73 @@ static DEVICE_ATTR(multicast_startup_query_interval, S_IRUGO | S_IWUSR,
 		   show_multicast_startup_query_interval,
 		   store_multicast_startup_query_interval);
 #endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+static ssize_t show_nf_call_iptables(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->nf_call_iptables);
+}
+
+static int set_nf_call_iptables(struct net_bridge *br, unsigned long val)
+{
+	br->nf_call_iptables = val ? true : false;
+	return 0;
+}
+
+static ssize_t store_nf_call_iptables(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_nf_call_iptables);
+}
+static DEVICE_ATTR(nf_call_iptables, S_IRUGO | S_IWUSR,
+		   show_nf_call_iptables, store_nf_call_iptables);
+
+static ssize_t show_nf_call_ip6tables(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->nf_call_ip6tables);
+}
+
+static int set_nf_call_ip6tables(struct net_bridge *br, unsigned long val)
+{
+	br->nf_call_ip6tables = val ? true : false;
+	return 0;
+}
+
+static ssize_t store_nf_call_ip6tables(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_nf_call_ip6tables);
+}
+static DEVICE_ATTR(nf_call_ip6tables, S_IRUGO | S_IWUSR,
+		   show_nf_call_ip6tables, store_nf_call_ip6tables);
+
+static ssize_t show_nf_call_arptables(
+	struct device *d, struct device_attribute *attr, char *buf)
+{
+	struct net_bridge *br = to_bridge(d);
+	return sprintf(buf, "%u\n", br->nf_call_arptables);
+}
+
+static int set_nf_call_arptables(struct net_bridge *br, unsigned long val)
+{
+	br->nf_call_arptables = val ? true : false;
+	return 0;
+}
+
+static ssize_t store_nf_call_arptables(
+	struct device *d, struct device_attribute *attr, const char *buf,
+	size_t len)
+{
+	return store_bridge_parm(d, buf, len, set_nf_call_arptables);
+}
+static DEVICE_ATTR(nf_call_arptables, S_IRUGO | S_IWUSR,
+		   show_nf_call_arptables, store_nf_call_arptables);
+#endif
 
 static struct attribute *bridge_attrs[] = {
 	&dev_attr_forward_delay.attr,
@@ -644,6 +711,11 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_query_interval.attr,
 	&dev_attr_multicast_query_response_interval.attr,
 	&dev_attr_multicast_startup_query_interval.attr,
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	&dev_attr_nf_call_iptables.attr,
+	&dev_attr_nf_call_ip6tables.attr,
+	&dev_attr_nf_call_arptables.attr,
 #endif
 	NULL
 };
-- 
cgit v1.2.3-58-ga151


From 499031ac8a3df6738f6186ded9da853e8ea18253 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 2 Jul 2010 10:05:01 +0200
Subject: netfilter: ip6t_REJECT: fix a dst leak in ipv6 REJECT

We should release dst if dst->error is set.

Bug introduced in 2.6.14 by commit e104411b82f5c
([XFRM]: Always release dst_entry on error in xfrm_lookup)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6t_REJECT.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 47d227713758..2933396e0281 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -97,9 +97,11 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	fl.fl_ip_dport = otcph.source;
 	security_skb_classify_flow(oldskb, &fl);
 	dst = ip6_route_output(net, NULL, &fl);
-	if (dst == NULL)
+	if (dst == NULL || dst->error) {
+		dst_release(dst);
 		return;
-	if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0))
+	}
+	if (xfrm_lookup(net, &dst, &fl, NULL, 0))
 		return;
 
 	hh_len = (dst->dev->hard_header_len + 15)&~15;
-- 
cgit v1.2.3-58-ga151


From f0796d5c73e59786d09a1e617689d1d415f2db44 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Thu, 1 Jul 2010 13:21:57 +0000
Subject: net: decreasing real_num_tx_queues needs to flush qdisc

Reducing real_num_queues needs to flush the qdisc otherwise
skbs with queue_mappings greater then real_num_tx_queues can
be sent to the underlying driver.

The flow for this is,

dev_queue_xmit()
	dev_pick_tx()
		skb_tx_hash()  => hash using real_num_tx_queues
		skb_set_queue_mapping()
	...
	qdisc_enqueue_root() => enqueue skb on txq from hash
...
dev->real_num_tx_queues -= n
...
sch_direct_xmit()
	dev_hard_start_xmit()
		ndo_start_xmit(skb,dev) => skb queue set with old hash

skbs are enqueued on the qdisc with skb->queue_mapping set
0 < queue_mappings < real_num_tx_queues.  When the driver
decreases real_num_tx_queues skb's may be dequeued from the
qdisc with a queue_mapping greater then real_num_tx_queues.

This fixes a case in ixgbe where this was occurring with DCB
and FCoE. Because the driver is using queue_mapping to map
skbs to tx descriptor rings we can potentially map skbs to
rings that no longer exist.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ixgbe/ixgbe_main.c |  2 +-
 include/linux/netdevice.h      |  3 +++
 include/net/sch_generic.h      | 12 ++++++++----
 net/core/dev.c                 | 18 ++++++++++++++++++
 4 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index a0b33165b989..7b5d9764f317 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -4001,7 +4001,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 
 done:
 	/* Notify the stack of the (possibly) reduced Tx Queue count. */
-	adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
+	netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
 }
 
 static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40291f375024..5e6188d9f017 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1656,6 +1656,9 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
 	return (dev->num_tx_queues > 1);
 }
 
+extern void netif_set_real_num_tx_queues(struct net_device *dev,
+					 unsigned int txq);
+
 /* Use this variant when it is known for sure that it
  * is executing from hardware interrupt context or with hardware interrupts
  * disabled.
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ba749be1e354..433604bb3fe8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -313,13 +313,12 @@ extern void qdisc_calculate_pkt_len(struct sk_buff *skb,
 extern void tcf_destroy(struct tcf_proto *tp);
 extern void tcf_destroy_chain(struct tcf_proto **fl);
 
-/* Reset all TX qdiscs of a device.  */
-static inline void qdisc_reset_all_tx(struct net_device *dev)
+/* Reset all TX qdiscs greater then index of a device.  */
+static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 {
-	unsigned int i;
 	struct Qdisc *qdisc;
 
-	for (i = 0; i < dev->num_tx_queues; i++) {
+	for (; i < dev->num_tx_queues; i++) {
 		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
 		if (qdisc) {
 			spin_lock_bh(qdisc_lock(qdisc));
@@ -329,6 +328,11 @@ static inline void qdisc_reset_all_tx(struct net_device *dev)
 	}
 }
 
+static inline void qdisc_reset_all_tx(struct net_device *dev)
+{
+	qdisc_reset_all_tx_gt(dev, 0);
+}
+
 /* Are all TX queues of the device empty?  */
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b3bf53bc687..723a34710ad4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1553,6 +1553,24 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/*
+ * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
+ * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
+ */
+void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+{
+	unsigned int real_num = dev->real_num_tx_queues;
+
+	if (unlikely(txq > dev->num_tx_queues))
+		;
+	else if (txq > real_num)
+		dev->real_num_tx_queues = txq;
+	else if (txq < real_num) {
+		dev->real_num_tx_queues = txq;
+		qdisc_reset_all_tx_gt(dev, txq);
+	}
+}
+EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 
 static inline void __netif_reschedule(struct Qdisc *q)
 {
-- 
cgit v1.2.3-58-ga151


From 256df2f3879efdb2e9808bdb1b54b16fbb11fa38 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sun, 27 Jun 2010 01:02:35 +0000
Subject: netdevice.h net/core/dev.c: Convert netdev_<level> logging macros to
 functions

Reduces an x86 defconfig text and data ~2k.
text is smaller, data is larger.

$ size vmlinux*
   text	   data	    bss	    dec	    hex	filename
7198862	 720112	1366288	9285262	 8dae8e	vmlinux
7205273	 716016	1366288	9287577	 8db799	vmlinux.device_h

Uses %pV and struct va_format
Format arguments are verified before printk

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 36 +++++++++++++--------------
 net/core/dev.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8fa5e5aa879a..0183901ea475 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2250,25 +2250,23 @@ static inline const char *netdev_name(const struct net_device *dev)
 	return dev->name;
 }
 
-#define netdev_printk(level, netdev, format, args...)		\
-	dev_printk(level, (netdev)->dev.parent,			\
-		   "%s: " format,				\
-		   netdev_name(netdev), ##args)
-
-#define netdev_emerg(dev, format, args...)			\
-	netdev_printk(KERN_EMERG, dev, format, ##args)
-#define netdev_alert(dev, format, args...)			\
-	netdev_printk(KERN_ALERT, dev, format, ##args)
-#define netdev_crit(dev, format, args...)			\
-	netdev_printk(KERN_CRIT, dev, format, ##args)
-#define netdev_err(dev, format, args...)			\
-	netdev_printk(KERN_ERR, dev, format, ##args)
-#define netdev_warn(dev, format, args...)			\
-	netdev_printk(KERN_WARNING, dev, format, ##args)
-#define netdev_notice(dev, format, args...)			\
-	netdev_printk(KERN_NOTICE, dev, format, ##args)
-#define netdev_info(dev, format, args...)			\
-	netdev_printk(KERN_INFO, dev, format, ##args)
+extern int netdev_printk(const char *level, const struct net_device *dev,
+			 const char *format, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern int netdev_emerg(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_alert(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_crit(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_err(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_warn(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_notice(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
+extern int netdev_info(const struct net_device *dev, const char *format, ...)
+	__attribute__ ((format (printf, 2, 3)));
 
 #if defined(DEBUG)
 #define netdev_dbg(__dev, format, args...)			\
diff --git a/net/core/dev.c b/net/core/dev.c
index e85cc5fa3c4e..93b8929fa21d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5802,6 +5802,68 @@ char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
 	return buffer;
 }
 
+static int __netdev_printk(const char *level, const struct net_device *dev,
+			   struct va_format *vaf)
+{
+	int r;
+
+	if (dev && dev->dev.parent)
+		r = dev_printk(level, dev->dev.parent, "%s: %pV",
+			       netdev_name(dev), vaf);
+	else if (dev)
+		r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
+	else
+		r = printk("%s(NULL net_device): %pV", level, vaf);
+
+	return r;
+}
+
+int netdev_printk(const char *level, const struct net_device *dev,
+		  const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	r = __netdev_printk(level, dev, &vaf);
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(netdev_printk);
+
+#define define_netdev_printk_level(func, level)			\
+int func(const struct net_device *dev, const char *fmt, ...)	\
+{								\
+	int r;							\
+	struct va_format vaf;					\
+	va_list args;						\
+								\
+	va_start(args, fmt);					\
+								\
+	vaf.fmt = fmt;						\
+	vaf.va = &args;						\
+								\
+	r = __netdev_printk(level, dev, &vaf);			\
+	va_end(args);						\
+								\
+	return r;						\
+}								\
+EXPORT_SYMBOL(func);
+
+define_netdev_printk_level(netdev_emerg, KERN_EMERG);
+define_netdev_printk_level(netdev_alert, KERN_ALERT);
+define_netdev_printk_level(netdev_crit, KERN_CRIT);
+define_netdev_printk_level(netdev_err, KERN_ERR);
+define_netdev_printk_level(netdev_warn, KERN_WARNING);
+define_netdev_printk_level(netdev_notice, KERN_NOTICE);
+define_netdev_printk_level(netdev_info, KERN_INFO);
+
 static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
-- 
cgit v1.2.3-58-ga151


From 44b451f1633896de15d2d52e1a2bd462e80b7814 Mon Sep 17 00:00:00 2001
From: Peter Kosyh <p.kosyh@gmail.com>
Date: Fri, 2 Jul 2010 07:47:55 +0000
Subject: xfrm: fix xfrm by MARK logic

While using xfrm by MARK feature in
2.6.34 - 2.6.35 kernels, the mark
is always cleared in flowi structure via memset in
_decode_session4 (net/ipv4/xfrm4_policy.c), so
the policy lookup fails.
IPv6 code is affected by this bug too.

Signed-off-by: Peter Kosyh <p.kosyh@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/xfrm4_policy.c | 2 ++
 net/ipv6/xfrm6_policy.c | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1705476670ef..23883a48ebfb 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -108,6 +108,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 
 	memset(fl, 0, sizeof(struct flowi));
+	fl->mark = skb->mark;
+
 	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
 		switch (iph->protocol) {
 		case IPPROTO_UDP:
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4a0e77e14468..6baeabbbca82 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -124,6 +124,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	u8 nexthdr = nh[IP6CB(skb)->nhoff];
 
 	memset(fl, 0, sizeof(struct flowi));
+	fl->mark = skb->mark;
+
 	ipv6_addr_copy(&fl->fl6_dst, reverse ? &hdr->saddr : &hdr->daddr);
 	ipv6_addr_copy(&fl->fl6_src, reverse ? &hdr->daddr : &hdr->saddr);
 
-- 
cgit v1.2.3-58-ga151


From fe76cda3081b502986f0c8b28b0cf8bfc27d44d5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 1 Jul 2010 23:48:22 +0000
Subject: ipv4: use skb_dst_copy() in ip_copy_metadata()

Avoid touching dst refcount in ip_fragment().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7d1f4b4481a9..d6478521128e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -411,7 +411,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 	to->priority = from->priority;
 	to->protocol = from->protocol;
 	skb_dst_drop(to);
-	skb_dst_set(to, dst_clone(skb_dst(from)));
+	skb_dst_copy(to, from);
 	to->dev = from->dev;
 	to->mark = from->mark;
 
-- 
cgit v1.2.3-58-ga151


From 7f285fa78d4b81b8458f05e77fb6b46245121b4e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 5 Jul 2010 14:50:08 +0000
Subject: bridge br_multicast: BUG: unable to handle kernel NULL pointer
 dereference

On Tue, Jul 06, 2010 at 08:48:35AM +0800, Herbert Xu wrote:
>
> bridge: Restore NULL check in br_mdb_ip_get

Resend with proper attribution.

bridge: Restore NULL check in br_mdb_ip_get

Somewhere along the line the NULL check in br_mdb_ip_get went
AWOL, causing crashes when we receive an IGMP packet with no
multicast table allocated.

This patch restores it and ensures all br_mdb_*_get functions
use it.

Reported-by: Frank Arnold <frank.arnold@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Thanks,
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9d21d98ae5fa..27ae946363f1 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -99,6 +99,15 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get(
 	return NULL;
 }
 
+static struct net_bridge_mdb_entry *br_mdb_ip_get(
+	struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
+{
+	if (!mdb)
+		return NULL;
+
+	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
+}
+
 static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	struct net_bridge_mdb_htable *mdb, __be32 dst)
 {
@@ -107,7 +116,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	br_dst.u.ip4 = dst;
 	br_dst.proto = htons(ETH_P_IP);
 
-	return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst));
+	return br_mdb_ip_get(mdb, &br_dst);
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
@@ -119,23 +128,17 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
 	ipv6_addr_copy(&br_dst.u.ip6, dst);
 	br_dst.proto = htons(ETH_P_IPV6);
 
-	return __br_mdb_ip_get(mdb, &br_dst, __br_ip6_hash(mdb, dst));
+	return br_mdb_ip_get(mdb, &br_dst);
 }
 #endif
 
-static struct net_bridge_mdb_entry *br_mdb_ip_get(
-	struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
-{
-	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
-}
-
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb)
 {
 	struct net_bridge_mdb_htable *mdb = br->mdb;
 	struct br_ip ip;
 
-	if (!mdb || br->multicast_disabled)
+	if (br->multicast_disabled)
 		return NULL;
 
 	if (BR_INPUT_SKB_CB(skb)->igmp)
-- 
cgit v1.2.3-58-ga151


From 17762060c25590bfddd68cc1131f28ec720f405f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 5 Jul 2010 21:29:28 +0000
Subject: bridge: Clear IPCB before possible entry into IP stack

The bridge protocol lives dangerously by having incestuous relations
with the IP stack.  In this instance an abomination has been created
where a bogus IPCB area from a bridged packet leads to a crash in
the IP stack because it's interpreted as IP options.

This patch papers over the problem by clearing the IPCB area in that
particular spot.  To fix this properly we'd also need to parse any
IP options if present but I'm way too lazy for that.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_netfilter.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 44420992f72f..8fb75f89c4aa 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -591,6 +591,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 
 	pskb_trim_rcsum(skb, len);
 
+	/* BUG: Should really parse the IP options here. */
+	memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+
 	nf_bridge_put(skb->nf_bridge);
 	if (!nf_bridge_alloc(skb))
 		return NF_DROP;
-- 
cgit v1.2.3-58-ga151


From 28172739f0a276eb8d6ca917b3974c2edb036da3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Jul 2010 14:58:56 -0700
Subject: net: fix 64 bit counters on 32 bit arches

There is a small possibility that a reader gets incorrect values on 32
bit arches. SNMP applications could catch incorrect counters when a
32bit high part is changed by another stats consumer/provider.

One way to solve this is to add a rtnl_link_stats64 param to all
ndo_get_stats64() methods, and also add such a parameter to
dev_get_stats().

Rule is that we are not allowed to use dev->stats64 as a temporary
storage for 64bit stats, but a caller provided area (usually on stack)

Old drivers (only providing get_stats() method) need no changes.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/appldata/appldata_net_sum.c       |  3 +-
 drivers/net/bonding/bond_main.c             | 64 ++++++++++++++---------------
 drivers/net/ixgbe/ixgbe_ethtool.c           |  8 ++--
 drivers/net/loopback.c                      |  4 +-
 drivers/net/macvlan.c                       |  6 +--
 drivers/net/sfc/efx.c                       |  3 +-
 drivers/net/sfc/ethtool.c                   |  3 +-
 drivers/parisc/led.c                        |  3 +-
 drivers/scsi/fcoe/fcoe.c                    |  3 +-
 drivers/staging/batman-adv/hard-interface.c |  3 +-
 drivers/usb/gadget/rndis.c                  |  3 +-
 include/linux/netdevice.h                   | 12 ++++--
 net/8021q/vlan_dev.c                        |  6 +--
 net/8021q/vlanproc.c                        |  3 +-
 net/bridge/br_device.c                      |  4 +-
 net/core/dev.c                              | 25 +++++++----
 net/core/net-sysfs.c                        |  4 +-
 net/core/rtnetlink.c                        |  3 +-
 18 files changed, 89 insertions(+), 71 deletions(-)

(limited to 'net')

diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 9a9586f4103f..f02e89ce4df1 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -85,7 +85,8 @@ static void appldata_get_net_sum_data(void *data)
 
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
-		const struct net_device_stats *stats = dev_get_stats(dev);
+		struct rtnl_link_stats64 temp;
+		const struct net_device_stats *stats = dev_get_stats(dev, &temp);
 
 		rx_packets += stats->rx_packets;
 		tx_packets += stats->tx_packets;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a95a41b74b4e..9bb9bfa225b6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3804,51 +3804,49 @@ static int bond_close(struct net_device *bond_dev)
 	return 0;
 }
 
-static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev)
+static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
+						struct rtnl_link_stats64 *stats)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct rtnl_link_stats64 *stats = &bond_dev->stats64;
-	struct rtnl_link_stats64 local_stats;
+	struct rtnl_link_stats64 temp;
 	struct slave *slave;
 	int i;
 
-	memset(&local_stats, 0, sizeof(local_stats));
+	memset(stats, 0, sizeof(*stats));
 
 	read_lock_bh(&bond->lock);
 
 	bond_for_each_slave(bond, slave, i) {
 		const struct rtnl_link_stats64 *sstats =
-			dev_get_stats(slave->dev);
-
-		local_stats.rx_packets += sstats->rx_packets;
-		local_stats.rx_bytes += sstats->rx_bytes;
-		local_stats.rx_errors += sstats->rx_errors;
-		local_stats.rx_dropped += sstats->rx_dropped;
-
-		local_stats.tx_packets += sstats->tx_packets;
-		local_stats.tx_bytes += sstats->tx_bytes;
-		local_stats.tx_errors += sstats->tx_errors;
-		local_stats.tx_dropped += sstats->tx_dropped;
-
-		local_stats.multicast += sstats->multicast;
-		local_stats.collisions += sstats->collisions;
-
-		local_stats.rx_length_errors += sstats->rx_length_errors;
-		local_stats.rx_over_errors += sstats->rx_over_errors;
-		local_stats.rx_crc_errors += sstats->rx_crc_errors;
-		local_stats.rx_frame_errors += sstats->rx_frame_errors;
-		local_stats.rx_fifo_errors += sstats->rx_fifo_errors;
-		local_stats.rx_missed_errors += sstats->rx_missed_errors;
-
-		local_stats.tx_aborted_errors += sstats->tx_aborted_errors;
-		local_stats.tx_carrier_errors += sstats->tx_carrier_errors;
-		local_stats.tx_fifo_errors += sstats->tx_fifo_errors;
-		local_stats.tx_heartbeat_errors += sstats->tx_heartbeat_errors;
-		local_stats.tx_window_errors += sstats->tx_window_errors;
+			dev_get_stats(slave->dev, &temp);
+
+		stats->rx_packets += sstats->rx_packets;
+		stats->rx_bytes += sstats->rx_bytes;
+		stats->rx_errors += sstats->rx_errors;
+		stats->rx_dropped += sstats->rx_dropped;
+
+		stats->tx_packets += sstats->tx_packets;
+		stats->tx_bytes += sstats->tx_bytes;
+		stats->tx_errors += sstats->tx_errors;
+		stats->tx_dropped += sstats->tx_dropped;
+
+		stats->multicast += sstats->multicast;
+		stats->collisions += sstats->collisions;
+
+		stats->rx_length_errors += sstats->rx_length_errors;
+		stats->rx_over_errors += sstats->rx_over_errors;
+		stats->rx_crc_errors += sstats->rx_crc_errors;
+		stats->rx_frame_errors += sstats->rx_frame_errors;
+		stats->rx_fifo_errors += sstats->rx_fifo_errors;
+		stats->rx_missed_errors += sstats->rx_missed_errors;
+
+		stats->tx_aborted_errors += sstats->tx_aborted_errors;
+		stats->tx_carrier_errors += sstats->tx_carrier_errors;
+		stats->tx_fifo_errors += sstats->tx_fifo_errors;
+		stats->tx_heartbeat_errors += sstats->tx_heartbeat_errors;
+		stats->tx_window_errors += sstats->tx_window_errors;
 	}
 
-	memcpy(stats, &local_stats, sizeof(struct net_device_stats));
-
 	read_unlock_bh(&bond->lock);
 
 	return stats;
diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c
index b35ef36741ef..da54b38bb480 100644
--- a/drivers/net/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ixgbe/ixgbe_ethtool.c
@@ -55,7 +55,7 @@ struct ixgbe_stats {
 				offsetof(struct ixgbe_adapter, m)
 #define IXGBE_NETDEV_STAT(m)	NETDEV_STATS, \
 				sizeof(((struct net_device *)0)->m), \
-				offsetof(struct net_device, m)
+				offsetof(struct net_device, m) - offsetof(struct net_device, stats)
 
 static struct ixgbe_stats ixgbe_gstrings_stats[] = {
 	{"rx_packets", IXGBE_NETDEV_STAT(stats.rx_packets)},
@@ -998,16 +998,18 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev,
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	u64 *queue_stat;
 	int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64);
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *net_stats;
 	int j, k;
 	int i;
 	char *p = NULL;
 
 	ixgbe_update_stats(adapter);
-	dev_get_stats(netdev);
+	net_stats = dev_get_stats(netdev, &temp);
 	for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) {
 		switch (ixgbe_gstrings_stats[i].type) {
 		case NETDEV_STATS:
-			p = (char *) netdev +
+			p = (char *) net_stats +
 					ixgbe_gstrings_stats[i].stat_offset;
 			break;
 		case IXGBE_STATS:
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 4dd0510d7a99..9a0996795321 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -98,10 +98,10 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
-static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *loopback_get_stats64(struct net_device *dev,
+						      struct rtnl_link_stats64 *stats)
 {
 	const struct pcpu_lstats __percpu *pcpu_lstats;
-	struct rtnl_link_stats64 *stats = &dev->stats64;
 	u64 bytes = 0;
 	u64 packets = 0;
 	u64 drops = 0;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e6d626e78515..6112f1498940 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -431,12 +431,12 @@ static void macvlan_uninit(struct net_device *dev)
 	free_percpu(vlan->rx_stats);
 }
 
-static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
+							 struct rtnl_link_stats64 *stats)
 {
-	struct rtnl_link_stats64 *stats = &dev->stats64;
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	dev_txq_stats_fold(dev, &dev->stats);
+	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
 
 	if (vlan->rx_stats) {
 		struct macvlan_rx_stats *p, accum = {0};
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 35b3f2922e5c..ba674c5ca29e 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -1533,11 +1533,10 @@ static int efx_net_stop(struct net_device *net_dev)
 }
 
 /* Context: process, dev_base_lock or RTNL held, non-blocking. */
-static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev)
+static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_mac_stats *mac_stats = &efx->mac_stats;
-	struct rtnl_link_stats64 *stats = &net_dev->stats64;
 
 	spin_lock_bh(&efx->stats_lock);
 	efx->type->update_stats(efx);
diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c
index 3b8b0a062749..fd19d6ab97a2 100644
--- a/drivers/net/sfc/ethtool.c
+++ b/drivers/net/sfc/ethtool.c
@@ -469,12 +469,13 @@ static void efx_ethtool_get_stats(struct net_device *net_dev,
 	struct efx_mac_stats *mac_stats = &efx->mac_stats;
 	struct efx_ethtool_stat *stat;
 	struct efx_channel *channel;
+	struct rtnl_link_stats64 temp;
 	int i;
 
 	EFX_BUG_ON_PARANOID(stats->n_stats != EFX_ETHTOOL_NUM_STATS);
 
 	/* Update MAC and NIC statistics */
-	dev_get_stats(net_dev);
+	dev_get_stats(net_dev, &temp);
 
 	/* Fill detailed statistics buffer */
 	for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) {
diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index 188bc8496a26..18dff43b8bd2 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -355,12 +355,13 @@ static __inline__ int led_get_net_activity(void)
 	rcu_read_lock();
 	for_each_netdev_rcu(&init_net, dev) {
 	    const struct net_device_stats *stats;
+	    struct rtnl_link_stats64 temp;
 	    struct in_device *in_dev = __in_dev_get_rcu(dev);
 	    if (!in_dev || !in_dev->ifa_list)
 		continue;
 	    if (ipv4_is_loopback(in_dev->ifa_list->ifa_local))
 		continue;
-	    stats = dev_get_stats(dev);
+	    stats = dev_get_stats(dev, &temp);
 	    rx_total += stats->rx_packets;
 	    tx_total += stats->tx_packets;
 	}
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 44a07593de56..1a429ed6da9d 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -2653,6 +2653,7 @@ static void fcoe_get_lesb(struct fc_lport *lport,
 	u32 lfc, vlfc, mdac;
 	struct fcoe_dev_stats *devst;
 	struct fcoe_fc_els_lesb *lesb;
+	struct rtnl_link_stats64 temp;
 	struct net_device *netdev = fcoe_netdev(lport);
 
 	lfc = 0;
@@ -2669,7 +2670,7 @@ static void fcoe_get_lesb(struct fc_lport *lport,
 	lesb->lesb_link_fail = htonl(lfc);
 	lesb->lesb_vlink_fail = htonl(vlfc);
 	lesb->lesb_miss_fka = htonl(mdac);
-	lesb->lesb_fcs_error = htonl(dev_get_stats(netdev)->rx_crc_errors);
+	lesb->lesb_fcs_error = htonl(dev_get_stats(netdev, &temp)->rx_crc_errors);
 }
 
 /**
diff --git a/drivers/staging/batman-adv/hard-interface.c b/drivers/staging/batman-adv/hard-interface.c
index 5ede9c255094..96c86c873011 100644
--- a/drivers/staging/batman-adv/hard-interface.c
+++ b/drivers/staging/batman-adv/hard-interface.c
@@ -440,6 +440,7 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	struct batman_packet *batman_packet;
 	struct batman_if *batman_if;
 	struct net_device_stats *stats;
+	struct rtnl_link_stats64 temp;
 	int ret;
 
 	skb = skb_share_check(skb, GFP_ATOMIC);
@@ -468,7 +469,7 @@ int batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	if (batman_if->if_status != IF_ACTIVE)
 		goto err_free;
 
-	stats = (struct net_device_stats *)dev_get_stats(skb->dev);
+	stats = (struct net_device_stats *)dev_get_stats(skb->dev, &temp);
 	if (stats) {
 		stats->rx_packets++;
 		stats->rx_bytes += skb->len;
diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
index fb69b01c8f3a..020fa5a25fda 100644
--- a/drivers/usb/gadget/rndis.c
+++ b/drivers/usb/gadget/rndis.c
@@ -171,6 +171,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	int			i, count;
 	rndis_query_cmplt_type	*resp;
 	struct net_device	*net;
+	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 
 	if (!r) return -ENOMEM;
@@ -194,7 +195,7 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
 	resp->InformationBufferOffset = cpu_to_le32 (16);
 
 	net = rndis_per_dev_params[configNr].dev;
-	stats = dev_get_stats(net);
+	stats = dev_get_stats(net, &temp);
 
 	switch (OID) {
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4d27368674db..60de65316fdb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -666,7 +666,8 @@ struct netdev_rx_queue {
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
- * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev);
+ * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev
+ *                      struct rtnl_link_stats64 *storage);
  * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
  *	statistics. Drivers must do one of the following:
@@ -733,7 +734,8 @@ struct net_device_ops {
 						   struct neigh_parms *);
 	void			(*ndo_tx_timeout) (struct net_device *dev);
 
-	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev);
+	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
+						     struct rtnl_link_stats64 *storage);
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 
 	void			(*ndo_vlan_rx_register)(struct net_device *dev,
@@ -2139,8 +2141,10 @@ extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
-extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev);
-extern void		dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats);
+extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+						     struct rtnl_link_stats64 *storage);
+extern void		dev_txq_stats_fold(const struct net_device *dev,
+					   struct net_device_stats *stats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c6456cb842fa..7865a4ce5250 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -803,11 +803,9 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 	return dev_ethtool_get_flags(vlan->real_dev);
 }
 
-static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
-	struct rtnl_link_stats64 *stats = &dev->stats64;
-
-	dev_txq_stats_fold(dev, &dev->stats);
+	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
 
 	if (vlan_dev_info(dev)->vlan_rx_stats) {
 		struct vlan_rx_stats *p, accum = {0};
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index df56f5ce887c..80e280f56686 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -278,6 +278,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
 	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	static const char fmt[] = "%30s %12lu\n";
 	static const char fmt64[] = "%30s %12llu\n";
@@ -286,7 +287,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	if (!is_vlan_dev(vlandev))
 		return 0;
 
-	stats = dev_get_stats(vlandev);
+	stats = dev_get_stats(vlandev, &temp);
 	seq_printf(seq,
 		   "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
 		   vlandev->name, dev_info->vlan_id,
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index edf639e96281..075c435ad22d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -98,10 +98,10 @@ static int br_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev)
+static struct rtnl_link_stats64 *br_get_stats64(struct net_device *dev,
+						struct rtnl_link_stats64 *stats)
 {
 	struct net_bridge *br = netdev_priv(dev);
-	struct rtnl_link_stats64 *stats = &dev->stats64;
 	struct br_cpu_netstats tmp, sum = { 0 };
 	unsigned int cpu;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 93b8929fa21d..92482d7a87a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3703,7 +3703,8 @@ void dev_seq_stop(struct seq_file *seq, void *v)
 
 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
 {
-	const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+	struct rtnl_link_stats64 temp;
+	const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
 
 	seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
 		   "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
@@ -5281,23 +5282,29 @@ EXPORT_SYMBOL(dev_txq_stats_fold);
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
+ *	@storage: place to store stats
  *
  *	Get network statistics from device. The device driver may provide
  *	its own method by setting dev->netdev_ops->get_stats64 or
  *	dev->netdev_ops->get_stats; otherwise the internal statistics
  *	structure is used.
  */
-const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev)
+const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					      struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-	if (ops->ndo_get_stats64)
-		return ops->ndo_get_stats64(dev);
-	if (ops->ndo_get_stats)
-		return (struct rtnl_link_stats64 *)ops->ndo_get_stats(dev);
-
-	dev_txq_stats_fold(dev, &dev->stats);
-	return &dev->stats64;
+	if (ops->ndo_get_stats64) {
+		memset(storage, 0, sizeof(*storage));
+		return ops->ndo_get_stats64(dev, storage);
+	}
+	if (ops->ndo_get_stats) {
+		memcpy(storage, ops->ndo_get_stats(dev), sizeof(*storage));
+		return storage;
+	}
+	memcpy(storage, &dev->stats, sizeof(*storage));
+	dev_txq_stats_fold(dev, (struct net_device_stats *)storage);
+	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ea3bb4c3b87d..914f42b0f039 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -330,7 +330,9 @@ static ssize_t netstat_show(const struct device *d,
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		const struct rtnl_link_stats64 *stats = dev_get_stats(dev);
+		struct rtnl_link_stats64 temp;
+		const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
+
 		ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset));
 	}
 	read_unlock(&dev_base_lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e645778e9b7e..5e773ea2201d 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -791,6 +791,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 {
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
+	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	struct nlattr *attr;
 
@@ -847,7 +848,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (attr == NULL)
 		goto nla_put_failure;
 
-	stats = dev_get_stats(dev);
+	stats = dev_get_stats(dev, &temp);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
 	attr = nla_reserve(skb, IFLA_STATS64,
-- 
cgit v1.2.3-58-ga151


From 49085bd7d498c47d635851cfda22627b085cd9af Mon Sep 17 00:00:00 2001
From: George Kadianakis <desnacked@gmail.com>
Date: Tue, 6 Jul 2010 11:44:12 +0000
Subject: net/ipv4/ip_output.c: Removal of unused variable in ip_fragment()

Removal of unused integer variable in ip_fragment().

Signed-off-by: George Kadianakis <desnacked@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d6478521128e..663cb2acb39e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -442,7 +442,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
 	struct iphdr *iph;
-	int raw = 0;
 	int ptr;
 	struct net_device *dev;
 	struct sk_buff *skb2;
@@ -580,7 +579,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 slow_path:
 	left = skb->len - hlen;		/* Space per frame */
-	ptr = raw + hlen;		/* Where to start from */
+	ptr = hlen;		/* Where to start from */
 
 	/* for bridged IP traffic encapsulated inside f.e. a vlan header,
 	 * we need to make room for the encapsulating header
-- 
cgit v1.2.3-58-ga151


From e501d0553a7580fcc6654d7f58a5f061d31d00af Mon Sep 17 00:00:00 2001
From: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Date: Thu, 8 Jul 2010 12:14:41 +0300
Subject: Bluetooth: Check L2CAP pending status before sending connect request

Due to race condition in L2CAP state machine L2CAP Connection Request
may be sent twice for SDP with the same source channel id. Problems
reported connecting to Apple products, some carkit, Blackberry phones.

...
2010-06-07 21:18:03.651031 < ACL data: handle 1 flags 0x02 dlen 12
    L2CAP(s): Connect req: psm 1 scid 0x0040
2010-06-07 21:18:03.653473 > HCI Event: Number of Completed Packets (0x13) plen 5
    handle 1 packets 1
2010-06-07 21:18:03.653808 > HCI Event: Auth Complete (0x06) plen 3
    status 0x00 handle 1
2010-06-07 21:18:03.653869 < ACL data: handle 1 flags 0x02 dlen 12
    L2CAP(s): Connect req: psm 1 scid 0x0040
...

Patch uses L2CAP_CONF_CONNECT_PEND flag to mark that L2CAP Connection
Request has been sent already.

Modified version of patch from Ville Tervo.

Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1b682a5aa061..cf3c4073a8a6 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -401,6 +401,11 @@ static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
 	l2cap_send_sframe(pi, control);
 }
 
+static inline int __l2cap_no_conn_pending(struct sock *sk)
+{
+	return !(l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND);
+}
+
 static void l2cap_do_start(struct sock *sk)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
@@ -409,12 +414,13 @@ static void l2cap_do_start(struct sock *sk)
 		if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
 			return;
 
-		if (l2cap_check_security(sk)) {
+		if (l2cap_check_security(sk) && __l2cap_no_conn_pending(sk)) {
 			struct l2cap_conn_req req;
 			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 			req.psm  = l2cap_pi(sk)->psm;
 
 			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+			l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
@@ -464,12 +470,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 		}
 
 		if (sk->sk_state == BT_CONNECT) {
-			if (l2cap_check_security(sk)) {
+			if (l2cap_check_security(sk) &&
+					__l2cap_no_conn_pending(sk)) {
 				struct l2cap_conn_req req;
 				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 				req.psm  = l2cap_pi(sk)->psm;
 
 				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+				l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
 				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
@@ -2912,7 +2920,6 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 		l2cap_pi(sk)->ident = 0;
 		l2cap_pi(sk)->dcid = dcid;
 		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
-
 		l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
 
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
@@ -4404,6 +4411,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
 				req.psm  = l2cap_pi(sk)->psm;
 
 				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+				l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
 
 				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_REQ, sizeof(req), &req);
-- 
cgit v1.2.3-58-ga151


From da213f41cdd445d3df468f8fe7f24fe4f4c48508 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Fri, 18 Jun 2010 11:08:56 +0300
Subject: Bluetooth: Reset the security level after an authentication failure

When authentication fails for a connection the assumed security level
should be set back to BT_SECURITY_LOW so that subsequent connect
attempts over the same link don't falsely assume that security is
adequate enough.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 6c57fc71c7e2..786b5de0bac4 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1049,6 +1049,8 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 	if (conn) {
 		if (!ev->status)
 			conn->link_mode |= HCI_LM_AUTH;
+		else
+			conn->sec_level = BT_SECURITY_LOW;
 
 		clear_bit(HCI_CONN_AUTH_PEND, &conn->pend);
 
-- 
cgit v1.2.3-58-ga151


From 045309820afe047920a50de25634dab46a1e851d Mon Sep 17 00:00:00 2001
From: Ville Tervo <ville.tervo@nokia.com>
Date: Tue, 15 Jun 2010 15:56:05 +0300
Subject: Bluetooth: Update sec_level/auth_type for already existing
 connections

Update auth level for already existing connections if it is lower
than required by new connection.

Signed-off-by: Ville Tervo <ville.tervo@nokia.com>
Reviewed-by: Emeltchenko Andrei <andrei.emeltchenko@nokia.com>
Signed-off-by: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: Andrei Emeltchenko <andrei.emeltchenko@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index b10e3cdb08f8..800b6b9fbbae 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -358,6 +358,11 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
 		acl->sec_level = sec_level;
 		acl->auth_type = auth_type;
 		hci_acl_connect(acl);
+	} else {
+		if (acl->sec_level < sec_level)
+			acl->sec_level = sec_level;
+		if (acl->auth_type < auth_type)
+			acl->auth_type = auth_type;
 	}
 
 	if (type == ACL_LINK)
-- 
cgit v1.2.3-58-ga151


From dd4ba83dc1becbb3bb383851381c10c372e47247 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 8 Jul 2010 21:35:58 -0700
Subject: gre: propagate ipv6 transport class

This patch makes IPV6 over IPv4 GRE tunnel propagate the transport
class field from the underlying IPV6 header to the IPV4 Type Of Service
field. Without the patch, all IPV6 packets in tunnel look the same to QoS.

This assumes that IPV6 transport class is exactly the same
as IPv4 TOS. Not sure if that is always the case?  Maybe need
to mask off some bits.

The mask and shift to get tclass is copied from ipv6/datagram.c

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 749e54889e82..945b20a5ad50 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -731,6 +731,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 		tos = 0;
 		if (skb->protocol == htons(ETH_P_IP))
 			tos = old_iph->tos;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			tos = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
 	}
 
 	{
-- 
cgit v1.2.3-58-ga151


From 635f081541edef7644073f9be50ee5bf7c57ce63 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 7 Jul 2010 20:56:53 +0000
Subject: Phonet: fix skb leak in pipe endpoint accept()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pep.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 94d72e85a475..b2a3ae6cad78 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -698,6 +698,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp)
 		newsk = NULL;
 		goto out;
 	}
+	kfree_skb(oskb);
 
 	sock_hold(sk);
 	pep_sk(newsk)->listener = sk;
-- 
cgit v1.2.3-58-ga151


From a204b48ed4dc31acf61090e530430ce3272b6aab Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Jul 2010 23:12:21 -0700
Subject: vlan: allow TSO setting on vlan interfaces

When we need to shape traffic using low speeds, we need to
disable tso on network interface :

ethtool -K eth0.2240 tso off

It seems vlan interfaces miss the set_tso() ethtool method.

Before enabling TSO, we must check real device supports
TSO for VLAN-tagged packets and enables TSO.

Note that a TSO change on real device propagates TSO setting
on all vlans, even if admin selected a different TSO setting.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_dev.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 7865a4ce5250..a1b8171cfa7b 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -836,12 +836,32 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
 	return stats;
 }
 
+static int vlan_ethtool_set_tso(struct net_device *dev, u32 data)
+{
+       if (data) {
+		struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+
+		/* Underlying device must support TSO for VLAN-tagged packets
+		 * and must have TSO enabled now.
+		 */
+		if (!(real_dev->vlan_features & NETIF_F_TSO))
+			return -EOPNOTSUPP;
+		if (!(real_dev->features & NETIF_F_TSO))
+			return -EINVAL;
+		dev->features |= NETIF_F_TSO;
+	} else {
+		dev->features &= ~NETIF_F_TSO;
+	}
+	return 0;
+}
+
 static const struct ethtool_ops vlan_ethtool_ops = {
 	.get_settings	        = vlan_ethtool_get_settings,
 	.get_drvinfo	        = vlan_ethtool_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
 	.get_rx_csum		= vlan_ethtool_get_rx_csum,
 	.get_flags		= vlan_ethtool_get_flags,
+	.set_tso                = vlan_ethtool_set_tso,
 };
 
 static const struct net_device_ops vlan_netdev_ops = {
-- 
cgit v1.2.3-58-ga151


From 7313bb8f3dd6e28bcf9c42adfd54a5cf9a4949e0 Mon Sep 17 00:00:00 2001
From: Karl Hiramoto <karl@hiramoto.org>
Date: Thu, 8 Jul 2010 20:55:30 +0000
Subject: atm: propagate signal changes via notifier

Add notifier chain for changes in atm_dev.

Clients like br2684 will call register_atmdevice_notifier() to be notified of
changes. Drivers will call atm_dev_signal_change() to notify clients like
br2684 of the change.

On DSL and ATM devices it's usefull to have a know if you have a carrier
signal. netdevice LOWER_UP changes can be propagated to userspace via netlink
monitor.

Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmdev.h | 17 +++++++++++++++++
 net/atm/common.c       | 30 ++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'net')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 817b23705c91..f6481daf6e52 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -431,6 +431,14 @@ struct atm_dev *atm_dev_register(const char *type,const struct atmdev_ops *ops,
     int number,unsigned long *flags); /* number == -1: pick first available */
 struct atm_dev *atm_dev_lookup(int number);
 void atm_dev_deregister(struct atm_dev *dev);
+
+/* atm_dev_signal_change
+ *
+ * Propagate lower layer signal change in atm_dev->signal to netdevice.
+ * The event will be sent via a notifier call chain.
+ */
+void atm_dev_signal_change(struct atm_dev *dev, char signal);
+
 void vcc_insert_socket(struct sock *sk);
 
 
@@ -510,6 +518,15 @@ void register_atm_ioctl(struct atm_ioctl *);
  */
 void deregister_atm_ioctl(struct atm_ioctl *);
 
+
+/* register_atmdevice_notifier - register atm_dev notify events
+ *
+ * Clients like br2684 will register notify events
+ * Currently we notify of signal found/lost
+ */
+int register_atmdevice_notifier(struct notifier_block *nb);
+void unregister_atmdevice_notifier(struct notifier_block *nb);
+
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/net/atm/common.c b/net/atm/common.c
index b43feb1a3995..940404a73b3d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -37,6 +37,8 @@ EXPORT_SYMBOL(vcc_hash);
 DEFINE_RWLOCK(vcc_sklist_lock);
 EXPORT_SYMBOL(vcc_sklist_lock);
 
+static ATOMIC_NOTIFIER_HEAD(atm_dev_notify_chain);
+
 static void __vcc_insert_socket(struct sock *sk)
 {
 	struct atm_vcc *vcc = atm_sk(sk);
@@ -212,6 +214,22 @@ void vcc_release_async(struct atm_vcc *vcc, int reply)
 }
 EXPORT_SYMBOL(vcc_release_async);
 
+void atm_dev_signal_change(struct atm_dev *dev, char signal)
+{
+	pr_debug("%s signal=%d dev=%p number=%d dev->signal=%d\n",
+		__func__, signal, dev, dev->number, dev->signal);
+
+	/* atm driver sending invalid signal */
+	WARN_ON(signal < ATM_PHY_SIG_LOST || signal > ATM_PHY_SIG_FOUND);
+
+	if (dev->signal == signal)
+		return; /* no change */
+
+	dev->signal = signal;
+
+	atomic_notifier_call_chain(&atm_dev_notify_chain, signal, dev);
+}
+EXPORT_SYMBOL(atm_dev_signal_change);
 
 void atm_dev_release_vccs(struct atm_dev *dev)
 {
@@ -781,6 +799,18 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
 	return vcc->dev->ops->getsockopt(vcc, level, optname, optval, len);
 }
 
+int register_atmdevice_notifier(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&atm_dev_notify_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_atmdevice_notifier);
+
+void unregister_atmdevice_notifier(struct notifier_block *nb)
+{
+	atomic_notifier_chain_unregister(&atm_dev_notify_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_atmdevice_notifier);
+
 static int __init atm_init(void)
 {
 	int error;
-- 
cgit v1.2.3-58-ga151


From 005066122b58f3b350736cc896af46aea2e32d23 Mon Sep 17 00:00:00 2001
From: Karl Hiramoto <karl@hiramoto.org>
Date: Thu, 8 Jul 2010 20:55:31 +0000
Subject: atm/br2684: register notifier event for carrier signal changes.

When a signal change event occurs call netif_carrier_on/off.

Signed-off-by: Karl Hiramoto <karl@hiramoto.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/br2684.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 64 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index 6719af6a59fa..651babdfab38 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -139,6 +139,43 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
 	return NULL;
 }
 
+static int atm_dev_event(struct notifier_block *this, unsigned long event,
+		 void *arg)
+{
+	struct atm_dev *atm_dev = arg;
+	struct list_head *lh;
+	struct net_device *net_dev;
+	struct br2684_vcc *brvcc;
+	struct atm_vcc *atm_vcc;
+	unsigned long flags;
+
+	pr_debug("event=%ld dev=%p\n", event, atm_dev);
+
+	read_lock_irqsave(&devs_lock, flags);
+	list_for_each(lh, &br2684_devs) {
+		net_dev = list_entry_brdev(lh);
+
+		list_for_each_entry(brvcc, &BRPRIV(net_dev)->brvccs, brvccs) {
+			atm_vcc = brvcc->atmvcc;
+			if (atm_vcc && brvcc->atmvcc->dev == atm_dev) {
+
+				if (atm_vcc->dev->signal == ATM_PHY_SIG_LOST)
+					netif_carrier_off(net_dev);
+				else
+					netif_carrier_on(net_dev);
+
+			}
+		}
+	}
+	read_unlock_irqrestore(&devs_lock, flags);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block atm_dev_notifier = {
+	.notifier_call = atm_dev_event,
+};
+
 /* chained vcc->pop function.  Check if we should wake the netif_queue */
 static void br2684_pop(struct atm_vcc *vcc, struct sk_buff *skb)
 {
@@ -362,6 +399,12 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
 			unregister_netdev(net_dev);
 			free_netdev(net_dev);
 		}
+		read_lock_irq(&devs_lock);
+		if (list_empty(&br2684_devs)) {
+			/* last br2684 device */
+			unregister_atmdevice_notifier(&atm_dev_notifier);
+		}
+		read_unlock_irq(&devs_lock);
 		return;
 	}
 
@@ -530,6 +573,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
 
 		br2684_push(atmvcc, skb);
 	}
+
+	/* initialize netdev carrier state */
+	if (atmvcc->dev->signal == ATM_PHY_SIG_LOST)
+		netif_carrier_off(net_dev);
+	else
+		netif_carrier_on(net_dev);
+
 	__module_get(THIS_MODULE);
 	return 0;
 
@@ -620,9 +670,16 @@ static int br2684_create(void __user *arg)
 	}
 
 	write_lock_irq(&devs_lock);
+
 	brdev->payload = payload;
-	brdev->number = list_empty(&br2684_devs) ? 1 :
-	    BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
+
+	if (list_empty(&br2684_devs)) {
+		/* 1st br2684 device */
+		register_atmdevice_notifier(&atm_dev_notifier);
+		brdev->number = 1;
+	} else
+		brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1;
+
 	list_add_tail(&brdev->br2684_devs, &br2684_devs);
 	write_unlock_irq(&devs_lock);
 	return 0;
@@ -772,6 +829,11 @@ static void __exit br2684_exit(void)
 	remove_proc_entry("br2684", atm_proc_root);
 #endif
 
+
+	/* if not already empty */
+	if (!list_empty(&br2684_devs))
+		unregister_atmdevice_notifier(&atm_dev_notifier);
+
 	while (!list_empty(&br2684_devs)) {
 		net_dev = list_entry_brdev(br2684_devs.next);
 		brdev = BRPRIV(net_dev);
-- 
cgit v1.2.3-58-ga151


From 3cfde79c6c7c8002375c4a8e5be7f602fbb9675d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 9 Jul 2010 09:11:52 +0000
Subject: net: Get rid of rtnl_link_stats64 / net_device_stats union

In commit be1f3c2c027cc5ad735df6a45a542ed1db7ec48b "net: Enable 64-bit
net device statistics on 32-bit architectures" I redefined struct
net_device_stats so that it could be used in a union with struct
rtnl_link_stats64, avoiding the need for explicit copying or
conversion between the two.  However, this is unsafe because there is
no locking required and no lock consistently held around calls to
dev_get_stats() and use of the statistics structure it returns.

In commit 28172739f0a276eb8d6ca917b3974c2edb036da3 "net: fix 64 bit
counters on 32 bit arches" Eric Dumazet dealt with that problem by
requiring callers of dev_get_stats() to provide storage for the
result.  This means that the net_device::stats64 field and the padding
in struct net_device_stats are now redundant, so remove them.

Update the comment on net_device_ops::ndo_get_stats64 to reflect its
new usage.

Change dev_txq_stats_fold() to use struct rtnl_link_stats64, since
that is what all its callers are really using and it is no longer
going to be compatible with struct net_device_stats.

Eric Dumazet suggested the separate function for the structure
conversion.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     |  2 +-
 include/linux/netdevice.h | 70 +++++++++++++++++++----------------------------
 net/8021q/vlan_dev.c      |  2 +-
 net/core/dev.c            | 31 +++++++++++++++++----
 4 files changed, 56 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 6112f1498940..1b28aaec0a5a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -436,7 +436,7 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
+	dev_txq_stats_fold(dev, stats);
 
 	if (vlan->rx_stats) {
 		struct macvlan_rx_stats *p, accum = {0};
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8018f6bf3051..17e95e37aed9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -162,42 +162,32 @@ static inline bool dev_xmit_complete(int rc)
 /*
  *	Old network device statistics. Fields are native words
  *	(unsigned long) so they can be read and written atomically.
- *	Each field is padded to 64 bits for compatibility with
- *	rtnl_link_stats64.
  */
 
-#if BITS_PER_LONG == 64
-#define NET_DEVICE_STATS_DEFINE(name)	unsigned long name
-#elif defined(__LITTLE_ENDIAN)
-#define NET_DEVICE_STATS_DEFINE(name)	unsigned long name, pad_ ## name
-#else
-#define NET_DEVICE_STATS_DEFINE(name)	unsigned long pad_ ## name, name
-#endif
-
 struct net_device_stats {
-	NET_DEVICE_STATS_DEFINE(rx_packets);
-	NET_DEVICE_STATS_DEFINE(tx_packets);
-	NET_DEVICE_STATS_DEFINE(rx_bytes);
-	NET_DEVICE_STATS_DEFINE(tx_bytes);
-	NET_DEVICE_STATS_DEFINE(rx_errors);
-	NET_DEVICE_STATS_DEFINE(tx_errors);
-	NET_DEVICE_STATS_DEFINE(rx_dropped);
-	NET_DEVICE_STATS_DEFINE(tx_dropped);
-	NET_DEVICE_STATS_DEFINE(multicast);
-	NET_DEVICE_STATS_DEFINE(collisions);
-	NET_DEVICE_STATS_DEFINE(rx_length_errors);
-	NET_DEVICE_STATS_DEFINE(rx_over_errors);
-	NET_DEVICE_STATS_DEFINE(rx_crc_errors);
-	NET_DEVICE_STATS_DEFINE(rx_frame_errors);
-	NET_DEVICE_STATS_DEFINE(rx_fifo_errors);
-	NET_DEVICE_STATS_DEFINE(rx_missed_errors);
-	NET_DEVICE_STATS_DEFINE(tx_aborted_errors);
-	NET_DEVICE_STATS_DEFINE(tx_carrier_errors);
-	NET_DEVICE_STATS_DEFINE(tx_fifo_errors);
-	NET_DEVICE_STATS_DEFINE(tx_heartbeat_errors);
-	NET_DEVICE_STATS_DEFINE(tx_window_errors);
-	NET_DEVICE_STATS_DEFINE(rx_compressed);
-	NET_DEVICE_STATS_DEFINE(tx_compressed);
+	unsigned long	rx_packets;
+	unsigned long	tx_packets;
+	unsigned long	rx_bytes;
+	unsigned long	tx_bytes;
+	unsigned long	rx_errors;
+	unsigned long	tx_errors;
+	unsigned long	rx_dropped;
+	unsigned long	tx_dropped;
+	unsigned long	multicast;
+	unsigned long	collisions;
+	unsigned long	rx_length_errors;
+	unsigned long	rx_over_errors;
+	unsigned long	rx_crc_errors;
+	unsigned long	rx_frame_errors;
+	unsigned long	rx_fifo_errors;
+	unsigned long	rx_missed_errors;
+	unsigned long	tx_aborted_errors;
+	unsigned long	tx_carrier_errors;
+	unsigned long	tx_fifo_errors;
+	unsigned long	tx_heartbeat_errors;
+	unsigned long	tx_window_errors;
+	unsigned long	rx_compressed;
+	unsigned long	tx_compressed;
 };
 
 #endif  /*  __KERNEL__  */
@@ -666,14 +656,13 @@ struct netdev_rx_queue {
  *	Callback uses when the transmitter has not made any progress
  *	for dev->watchdog ticks.
  *
- * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev
+ * struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
  *                      struct rtnl_link_stats64 *storage);
  * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
  *	Called when a user wants to get the network device usage
  *	statistics. Drivers must do one of the following:
- *	1. Define @ndo_get_stats64 to update a rtnl_link_stats64 structure
- *	   (which should normally be dev->stats64) and return a ponter to
- *	   it. The structure must not be changed asynchronously.
+ *	1. Define @ndo_get_stats64 to fill in a zero-initialised
+ *	   rtnl_link_stats64 structure passed by the caller.
  *	2. Define @ndo_get_stats to update a net_device_stats structure
  *	   (which should normally be dev->stats) and return a pointer to
  *	   it. The structure may be changed asynchronously only if each
@@ -888,10 +877,7 @@ struct net_device {
 	int			ifindex;
 	int			iflink;
 
-	union {
-		struct rtnl_link_stats64 stats64;
-		struct net_device_stats stats;
-	};
+	struct net_device_stats	stats;
 
 #ifdef CONFIG_WIRELESS_EXT
 	/* List of functions to handle Wireless Extensions (instead of ioctl).
@@ -2147,7 +2133,7 @@ extern void		dev_mcast_init(void);
 extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 						     struct rtnl_link_stats64 *storage);
 extern void		dev_txq_stats_fold(const struct net_device *dev,
-					   struct net_device_stats *stats);
+					   struct rtnl_link_stats64 *stats);
 
 extern int		netdev_max_backlog;
 extern int		netdev_tstamp_prequeue;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a1b8171cfa7b..7cb285f96b99 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -805,7 +805,7 @@ static u32 vlan_ethtool_get_flags(struct net_device *dev)
 
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
-	dev_txq_stats_fold(dev, (struct net_device_stats *)stats);
+	dev_txq_stats_fold(dev, stats);
 
 	if (vlan_dev_info(dev)->vlan_rx_stats) {
 		struct vlan_rx_stats *p, accum = {0};
diff --git a/net/core/dev.c b/net/core/dev.c
index eb4201cf9c8c..79ee26ef5095 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5274,10 +5274,10 @@ void netdev_run_todo(void)
 /**
  *	dev_txq_stats_fold - fold tx_queues stats
  *	@dev: device to get statistics from
- *	@stats: struct net_device_stats to hold results
+ *	@stats: struct rtnl_link_stats64 to hold results
  */
 void dev_txq_stats_fold(const struct net_device *dev,
-			struct net_device_stats *stats)
+			struct rtnl_link_stats64 *stats)
 {
 	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
 	unsigned int i;
@@ -5297,6 +5297,27 @@ void dev_txq_stats_fold(const struct net_device *dev,
 }
 EXPORT_SYMBOL(dev_txq_stats_fold);
 
+/* Convert net_device_stats to rtnl_link_stats64.  They have the same
+ * fields in the same order, with only the type differing.
+ */
+static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
+				    const struct net_device_stats *netdev_stats)
+{
+#if BITS_PER_LONG == 64
+        BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+        memcpy(stats64, netdev_stats, sizeof(*stats64));
+#else
+	size_t i, n = sizeof(*stats64) / sizeof(u64);
+	const unsigned long *src = (const unsigned long *)netdev_stats;
+	u64 *dst = (u64 *)stats64;
+
+	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
+		     sizeof(*stats64) / sizeof(u64));
+	for (i = 0; i < n; i++)
+		dst[i] = src[i];
+#endif
+}
+
 /**
  *	dev_get_stats	- get network device statistics
  *	@dev: device to get statistics from
@@ -5317,11 +5338,11 @@ const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 		return ops->ndo_get_stats64(dev, storage);
 	}
 	if (ops->ndo_get_stats) {
-		memcpy(storage, ops->ndo_get_stats(dev), sizeof(*storage));
+		netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
 		return storage;
 	}
-	memcpy(storage, &dev->stats, sizeof(*storage));
-	dev_txq_stats_fold(dev, (struct net_device_stats *)storage);
+	netdev_stats_to_stats64(storage, &dev->stats);
+	dev_txq_stats_fold(dev, storage);
 	return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
-- 
cgit v1.2.3-58-ga151


From d77535162e736c47978d5c01469c56e1781dc91b Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 9 Jul 2010 09:12:41 +0000
Subject: net: Document that dev_get_stats() returns the given pointer

Document that dev_get_stats() returns the same stats pointer it was
given.  Remove const qualification from the returned pointer since the
caller may do what it likes with that structure.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 17e95e37aed9..c4fedf000541 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2130,8 +2130,8 @@ extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
 extern void		dev_mcast_init(void);
-extern const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
-						     struct rtnl_link_stats64 *storage);
+extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					       struct rtnl_link_stats64 *storage);
 extern void		dev_txq_stats_fold(const struct net_device *dev,
 					   struct rtnl_link_stats64 *stats);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 79ee26ef5095..e2b9fa2c917e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5323,13 +5323,13 @@ static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
  *	@dev: device to get statistics from
  *	@storage: place to store stats
  *
- *	Get network statistics from device. The device driver may provide
- *	its own method by setting dev->netdev_ops->get_stats64 or
- *	dev->netdev_ops->get_stats; otherwise the internal statistics
- *	structure is used.
+ *	Get network statistics from device. Return @storage.
+ *	The device driver may provide its own method by setting
+ *	dev->netdev_ops->get_stats64 or dev->netdev_ops->get_stats;
+ *	otherwise the internal statistics structure is used.
  */
-const struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
-					      struct rtnl_link_stats64 *storage)
+struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
+					struct rtnl_link_stats64 *storage)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
-- 
cgit v1.2.3-58-ga151


From 4bc2f18ba4f22a90ab593c0a580fc9a19c4777b6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 9 Jul 2010 21:22:10 +0000
Subject: net/ipv4: EXPORT_SYMBOL cleanups

CodingStyle cleanups

EXPORT_SYMBOL should immediately follow the symbol declaration.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c                  | 18 +++++++-----------
 net/ipv4/datagram.c             |  2 --
 net/ipv4/fib_frontend.c         |  7 +++----
 net/ipv4/icmp.c                 |  7 +++----
 net/ipv4/igmp.c                 |  9 ++++-----
 net/ipv4/inet_connection_sock.c | 19 +------------------
 net/ipv4/inet_fragment.c        |  1 -
 net/ipv4/inet_hashtables.c      |  4 ----
 net/ipv4/ip_fragment.c          |  3 +--
 net/ipv4/ip_output.c            |  9 +++------
 net/ipv4/netfilter/ipt_REJECT.c |  2 +-
 net/ipv4/protocol.c             |  3 +--
 net/ipv4/route.c                |  7 ++-----
 net/ipv4/tcp.c                  | 35 ++++++++++++-----------------------
 net/ipv4/tcp_input.c            | 18 ++++++++----------
 net/ipv4/tcp_ipv4.c             | 35 ++++++++++++-----------------------
 net/ipv4/tcp_minisocks.c        |  9 +++------
 net/ipv4/tcp_output.c           | 12 +++++-------
 net/ipv4/tcp_timer.c            |  1 -
 net/ipv4/tunnel4.c              |  2 --
 net/ipv4/udplite.c              |  3 +--
 net/ipv4/xfrm4_input.c          |  1 -
 22 files changed, 67 insertions(+), 140 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 09ead1baa99e..96c1955b3e2f 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -116,6 +116,7 @@
 #if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
 #include <net/atmclip.h>
 struct neigh_table *clip_tbl_hook;
+EXPORT_SYMBOL(clip_tbl_hook);
 #endif
 
 #include <asm/system.h>
@@ -169,6 +170,7 @@ const struct neigh_ops arp_broken_ops = {
 	.hh_output =		dev_queue_xmit,
 	.queue_xmit =		dev_queue_xmit,
 };
+EXPORT_SYMBOL(arp_broken_ops);
 
 struct neigh_table arp_tbl = {
 	.family =	AF_INET,
@@ -198,6 +200,7 @@ struct neigh_table arp_tbl = {
 	.gc_thresh2 =	512,
 	.gc_thresh3 =	1024,
 };
+EXPORT_SYMBOL(arp_tbl);
 
 int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
 {
@@ -499,6 +502,7 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
 		kfree_skb(skb);
 	return 1;
 }
+EXPORT_SYMBOL(arp_find);
 
 /* END OF OBSOLETE FUNCTIONS */
 
@@ -700,6 +704,7 @@ out:
 	kfree_skb(skb);
 	return NULL;
 }
+EXPORT_SYMBOL(arp_create);
 
 /*
  *	Send an arp packet.
@@ -709,6 +714,7 @@ void arp_xmit(struct sk_buff *skb)
 	/* Send it off, maybe filter it using firewalling first.  */
 	NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
 }
+EXPORT_SYMBOL(arp_xmit);
 
 /*
  *	Create and send an arp packet.
@@ -735,6 +741,7 @@ void arp_send(int type, int ptype, __be32 dest_ip,
 
 	arp_xmit(skb);
 }
+EXPORT_SYMBOL(arp_send);
 
 /*
  *	Process an arp request.
@@ -1452,14 +1459,3 @@ static int __init arp_proc_init(void)
 }
 
 #endif /* CONFIG_PROC_FS */
-
-EXPORT_SYMBOL(arp_broken_ops);
-EXPORT_SYMBOL(arp_find);
-EXPORT_SYMBOL(arp_create);
-EXPORT_SYMBOL(arp_xmit);
-EXPORT_SYMBOL(arp_send);
-EXPORT_SYMBOL(arp_tbl);
-
-#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
-EXPORT_SYMBOL(clip_tbl_hook);
-#endif
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index fe3daa7f07a9..f0550941df7b 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -72,6 +72,4 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	sk_dst_set(sk, &rt->dst);
 	return(0);
 }
-
 EXPORT_SYMBOL(ip4_datagram_connect);
-
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index e830f7a123bd..a43968918350 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -175,6 +175,7 @@ out:
 	fib_res_put(&res);
 	return dev;
 }
+EXPORT_SYMBOL(ip_dev_find);
 
 /*
  * Find address type as if only "dev" was present in the system. If
@@ -214,12 +215,14 @@ unsigned int inet_addr_type(struct net *net, __be32 addr)
 {
 	return __inet_dev_addr_type(net, NULL, addr);
 }
+EXPORT_SYMBOL(inet_addr_type);
 
 unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
 				__be32 addr)
 {
        return __inet_dev_addr_type(net, dev, addr);
 }
+EXPORT_SYMBOL(inet_dev_addr_type);
 
 /* Given (packet source, input interface) and optional (dst, oif, tos):
    - (main) check, that source is valid i.e. not broadcast or our local
@@ -1077,7 +1080,3 @@ void __init ip_fib_init(void)
 
 	fib_hash_init();
 }
-
-EXPORT_SYMBOL(inet_addr_type);
-EXPORT_SYMBOL(inet_dev_addr_type);
-EXPORT_SYMBOL(ip_dev_find);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 7569b21a3a2d..a0d847c7cba5 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -181,6 +181,7 @@ const struct icmp_err icmp_err_convert[] = {
 		.fatal = 1,
 	},
 };
+EXPORT_SYMBOL(icmp_err_convert);
 
 /*
  *	ICMP control array. This specifies what to do with each ICMP.
@@ -267,6 +268,7 @@ int xrlim_allow(struct dst_entry *dst, int timeout)
 	dst->rate_tokens = token;
 	return rc;
 }
+EXPORT_SYMBOL(xrlim_allow);
 
 static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
 		int type, int code)
@@ -647,6 +649,7 @@ out_unlock:
 	icmp_xmit_unlock(sk);
 out:;
 }
+EXPORT_SYMBOL(icmp_send);
 
 
 /*
@@ -1214,7 +1217,3 @@ int __init icmp_init(void)
 {
 	return register_pernet_subsys(&icmp_sk_ops);
 }
-
-EXPORT_SYMBOL(icmp_err_convert);
-EXPORT_SYMBOL(icmp_send);
-EXPORT_SYMBOL(xrlim_allow);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b5580d422994..a1ad0e7180d2 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1244,6 +1244,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 out:
 	return;
 }
+EXPORT_SYMBOL(ip_mc_inc_group);
 
 /*
  *	Resend IGMP JOIN report; used for bonding.
@@ -1266,6 +1267,7 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
 	igmp_ifc_event(in_dev);
 #endif
 }
+EXPORT_SYMBOL(ip_mc_rejoin_group);
 
 /*
  *	A socket has left a multicast group on device dev
@@ -1296,6 +1298,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 		}
 	}
 }
+EXPORT_SYMBOL(ip_mc_dec_group);
 
 /* Device changing type */
 
@@ -1804,6 +1807,7 @@ done:
 	rtnl_unlock();
 	return err;
 }
+EXPORT_SYMBOL(ip_mc_join_group);
 
 static void ip_sf_socklist_reclaim(struct rcu_head *rp)
 {
@@ -2676,8 +2680,3 @@ int __init igmp_mc_proc_init(void)
 	return register_pernet_subsys(&igmp_net_ops);
 }
 #endif
-
-EXPORT_SYMBOL(ip_mc_dec_group);
-EXPORT_SYMBOL(ip_mc_inc_group);
-EXPORT_SYMBOL(ip_mc_join_group);
-EXPORT_SYMBOL(ip_mc_rejoin_group);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 57c9e4d7b805..7174370b1195 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -84,7 +84,6 @@ int inet_csk_bind_conflict(const struct sock *sk,
 	}
 	return node != NULL;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_bind_conflict);
 
 /* Obtain a reference to a local port for the given sock,
@@ -212,7 +211,6 @@ fail:
 	local_bh_enable();
 	return ret;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_get_port);
 
 /*
@@ -305,7 +303,6 @@ out_err:
 	*err = error;
 	goto out;
 }
-
 EXPORT_SYMBOL(inet_csk_accept);
 
 /*
@@ -327,7 +324,6 @@ void inet_csk_init_xmit_timers(struct sock *sk,
 	setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
 	icsk->icsk_pending = icsk->icsk_ack.pending = 0;
 }
-
 EXPORT_SYMBOL(inet_csk_init_xmit_timers);
 
 void inet_csk_clear_xmit_timers(struct sock *sk)
@@ -340,21 +336,18 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
 	sk_stop_timer(sk, &icsk->icsk_delack_timer);
 	sk_stop_timer(sk, &sk->sk_timer);
 }
-
 EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
 
 void inet_csk_delete_keepalive_timer(struct sock *sk)
 {
 	sk_stop_timer(sk, &sk->sk_timer);
 }
-
 EXPORT_SYMBOL(inet_csk_delete_keepalive_timer);
 
 void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
 {
 	sk_reset_timer(sk, &sk->sk_timer, jiffies + len);
 }
-
 EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
 
 struct dst_entry *inet_csk_route_req(struct sock *sk,
@@ -391,7 +384,6 @@ no_route:
 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
 	return NULL;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_route_req);
 
 static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
@@ -433,7 +425,6 @@ struct request_sock *inet_csk_search_req(const struct sock *sk,
 
 	return req;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_search_req);
 
 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
@@ -447,11 +438,11 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
 	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
 	inet_csk_reqsk_queue_added(sk, timeout);
 }
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
 
 /* Only thing we need from tcp.h */
 extern int sysctl_tcp_synack_retries;
 
-EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
 
 /* Decide when to expire the request and when to resend SYN-ACK */
 static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
@@ -569,7 +560,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 	if (lopt->qlen)
 		inet_csk_reset_keepalive_timer(parent, interval);
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
 
 struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
@@ -599,7 +589,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
 	}
 	return newsk;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_clone);
 
 /*
@@ -630,7 +619,6 @@ void inet_csk_destroy_sock(struct sock *sk)
 	percpu_counter_dec(sk->sk_prot->orphan_count);
 	sock_put(sk);
 }
-
 EXPORT_SYMBOL(inet_csk_destroy_sock);
 
 int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
@@ -665,7 +653,6 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
 	__reqsk_queue_destroy(&icsk->icsk_accept_queue);
 	return -EADDRINUSE;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);
 
 /*
@@ -720,7 +707,6 @@ void inet_csk_listen_stop(struct sock *sk)
 	}
 	WARN_ON(sk->sk_ack_backlog);
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
 
 void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
@@ -732,7 +718,6 @@ void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
 	sin->sin_addr.s_addr	= inet->inet_daddr;
 	sin->sin_port		= inet->inet_dport;
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr);
 
 #ifdef CONFIG_COMPAT
@@ -747,7 +732,6 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
 	return icsk->icsk_af_ops->getsockopt(sk, level, optname,
 					     optval, optlen);
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
 
 int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
@@ -761,6 +745,5 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
 	return icsk->icsk_af_ops->setsockopt(sk, level, optname,
 					     optval, optlen);
 }
-
 EXPORT_SYMBOL_GPL(inet_csk_compat_setsockopt);
 #endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a2ca6aed763b..5ff2a51b6d0c 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -114,7 +114,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
 		fq->last_in |= INET_FRAG_COMPLETE;
 	}
 }
-
 EXPORT_SYMBOL(inet_frag_kill);
 
 static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d3e160a88219..fb7ad5a21ff3 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -99,7 +99,6 @@ void inet_put_port(struct sock *sk)
 	__inet_put_port(sk);
 	local_bh_enable();
 }
-
 EXPORT_SYMBOL(inet_put_port);
 
 void __inet_inherit_port(struct sock *sk, struct sock *child)
@@ -116,7 +115,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child)
 	inet_csk(child)->icsk_bind_hash = tb;
 	spin_unlock(&head->lock);
 }
-
 EXPORT_SYMBOL_GPL(__inet_inherit_port);
 
 static inline int compute_score(struct sock *sk, struct net *net,
@@ -546,7 +544,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
 	return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),
 			__inet_check_established, __inet_hash_nolisten);
 }
-
 EXPORT_SYMBOL_GPL(inet_hash_connect);
 
 void inet_hashinfo_init(struct inet_hashinfo *h)
@@ -560,5 +557,4 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
 				      i + LISTENING_NULLS_BASE);
 		}
 }
-
 EXPORT_SYMBOL_GPL(inet_hashinfo_init);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index dd0dbf0c6b7f..b7c41654dde5 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -632,6 +632,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
 	kfree_skb(skb);
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(ip_defrag);
 
 #ifdef CONFIG_SYSCTL
 static int zero;
@@ -785,5 +786,3 @@ void __init ipfrag_init(void)
 	ip4_frags.secret_interval = 10 * 60 * HZ;
 	inet_frags_init(&ip4_frags);
 }
-
-EXPORT_SYMBOL(ip_defrag);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 663cb2acb39e..6652bd9da676 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -89,6 +89,7 @@ __inline__ void ip_send_check(struct iphdr *iph)
 	iph->check = 0;
 	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
 }
+EXPORT_SYMBOL(ip_send_check);
 
 int __ip_local_out(struct sk_buff *skb)
 {
@@ -172,7 +173,6 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
 	/* Send it out. */
 	return ip_local_out(skb);
 }
-
 EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
 
 static inline int ip_finish_output2(struct sk_buff *skb)
@@ -403,6 +403,7 @@ no_route:
 	kfree_skb(skb);
 	return -EHOSTUNREACH;
 }
+EXPORT_SYMBOL(ip_queue_xmit);
 
 
 static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
@@ -696,7 +697,6 @@ fail:
 	IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 	return err;
 }
-
 EXPORT_SYMBOL(ip_fragment);
 
 int
@@ -715,6 +715,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 	}
 	return 0;
 }
+EXPORT_SYMBOL(ip_generic_getfrag);
 
 static inline __wsum
 csum_page(struct page *page, int offset, int copy)
@@ -1447,7 +1448,3 @@ void __init ip_init(void)
 	igmp_mc_proc_init();
 #endif
 }
-
-EXPORT_SYMBOL(ip_generic_getfrag);
-EXPORT_SYMBOL(ip_queue_xmit);
-EXPORT_SYMBOL(ip_send_check);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index f5f4a888e4ec..bbbd2736c549 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -109,7 +109,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
 		addr_type = RTN_LOCAL;
 
 	/* ip_route_me_harder expects skb->dst to be set */
-	skb_dst_set(nskb, dst_clone(skb_dst(oldskb)));
+	skb_dst_set_noref(nskb, skb_dst(oldskb));
 
 	if (ip_route_me_harder(nskb, addr_type))
 		goto free_nskb;
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index 542f22fc98b3..f2d297351405 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -52,6 +52,7 @@ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 
 	return ret;
 }
+EXPORT_SYMBOL(inet_add_protocol);
 
 /*
  *	Remove a protocol from the hash tables.
@@ -76,6 +77,4 @@ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 
 	return ret;
 }
-
-EXPORT_SYMBOL(inet_add_protocol);
 EXPORT_SYMBOL(inet_del_protocol);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 03430de46166..562ce92de2a6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1324,6 +1324,7 @@ void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
 
 	ip_select_fb_ident(iph);
 }
+EXPORT_SYMBOL(__ip_select_ident);
 
 static void rt_del(unsigned hash, struct rtable *rt)
 {
@@ -2735,7 +2736,6 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
 slow_output:
 	return ip_route_output_slow(net, rp, flp);
 }
-
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
 
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -2819,13 +2819,13 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
 
 int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
 {
 	return ip_route_output_flow(net, rp, flp, NULL, 0);
 }
+EXPORT_SYMBOL(ip_route_output_key);
 
 static int rt_fill_info(struct net *net,
 			struct sk_buff *skb, u32 pid, u32 seq, int event,
@@ -3363,6 +3363,3 @@ void __init ip_static_sysctl_init(void)
 	register_sysctl_paths(ipv4_path, ipv4_skeleton);
 }
 #endif
-
-EXPORT_SYMBOL(__ip_select_ident);
-EXPORT_SYMBOL(ip_route_output_key);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4e6ddfbab09e..b8601b7683a6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -315,7 +315,6 @@ struct tcp_splice_state {
  * is strict, actions are advisory and have some latency.
  */
 int tcp_memory_pressure __read_mostly;
-
 EXPORT_SYMBOL(tcp_memory_pressure);
 
 void tcp_enter_memory_pressure(struct sock *sk)
@@ -325,7 +324,6 @@ void tcp_enter_memory_pressure(struct sock *sk)
 		tcp_memory_pressure = 1;
 	}
 }
-
 EXPORT_SYMBOL(tcp_enter_memory_pressure);
 
 /* Convert seconds to retransmits based on initial and max timeout */
@@ -460,6 +458,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	}
 	return mask;
 }
+EXPORT_SYMBOL(tcp_poll);
 
 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
@@ -508,6 +507,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 	return put_user(answ, (int __user *)arg);
 }
+EXPORT_SYMBOL(tcp_ioctl);
 
 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
 {
@@ -675,6 +675,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 
 	return ret;
 }
+EXPORT_SYMBOL(tcp_splice_read);
 
 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
 {
@@ -873,6 +874,7 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
 	release_sock(sk);
 	return res;
 }
+EXPORT_SYMBOL(tcp_sendpage);
 
 #define TCP_PAGE(sk)	(sk->sk_sndmsg_page)
 #define TCP_OFF(sk)	(sk->sk_sndmsg_off)
@@ -1121,6 +1123,7 @@ out_err:
 	release_sock(sk);
 	return err;
 }
+EXPORT_SYMBOL(tcp_sendmsg);
 
 /*
  *	Handle reading urgent data. BSD has very simple semantics for
@@ -1380,6 +1383,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		tcp_cleanup_rbuf(sk, copied);
 	return copied;
 }
+EXPORT_SYMBOL(tcp_read_sock);
 
 /*
  *	This routine copies from a sock struct into the user buffer.
@@ -1774,6 +1778,7 @@ recv_urg:
 	err = tcp_recv_urg(sk, msg, len, flags);
 	goto out;
 }
+EXPORT_SYMBOL(tcp_recvmsg);
 
 void tcp_set_state(struct sock *sk, int state)
 {
@@ -1866,6 +1871,7 @@ void tcp_shutdown(struct sock *sk, int how)
 			tcp_send_fin(sk);
 	}
 }
+EXPORT_SYMBOL(tcp_shutdown);
 
 void tcp_close(struct sock *sk, long timeout)
 {
@@ -2029,6 +2035,7 @@ out:
 	local_bh_enable();
 	sock_put(sk);
 }
+EXPORT_SYMBOL(tcp_close);
 
 /* These states need RST on ABORT according to RFC793 */
 
@@ -2102,6 +2109,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	sk->sk_error_report(sk);
 	return err;
 }
+EXPORT_SYMBOL(tcp_disconnect);
 
 /*
  *	Socket option code for TCP.
@@ -2400,6 +2408,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 						     optval, optlen);
 	return do_tcp_setsockopt(sk, level, optname, optval, optlen);
 }
+EXPORT_SYMBOL(tcp_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
@@ -2410,7 +2419,6 @@ int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
 						  optval, optlen);
 	return do_tcp_setsockopt(sk, level, optname, optval, optlen);
 }
-
 EXPORT_SYMBOL(compat_tcp_setsockopt);
 #endif
 
@@ -2476,7 +2484,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
 	info->tcpi_total_retrans = tp->total_retrans;
 }
-
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
 static int do_tcp_getsockopt(struct sock *sk, int level,
@@ -2615,6 +2622,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 						     optval, optlen);
 	return do_tcp_getsockopt(sk, level, optname, optval, optlen);
 }
+EXPORT_SYMBOL(tcp_getsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -2625,7 +2633,6 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 						  optval, optlen);
 	return do_tcp_getsockopt(sk, level, optname, optval, optlen);
 }
-
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
@@ -2862,7 +2869,6 @@ void tcp_free_md5sig_pool(void)
 	if (pool)
 		__tcp_free_md5sig_pool(pool);
 }
-
 EXPORT_SYMBOL(tcp_free_md5sig_pool);
 
 static struct tcp_md5sig_pool * __percpu *
@@ -2938,7 +2944,6 @@ retry:
 	}
 	return pool;
 }
-
 EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
 
 
@@ -2990,7 +2995,6 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
 	th->check = old_checksum;
 	return err;
 }
-
 EXPORT_SYMBOL(tcp_md5_hash_header);
 
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
@@ -3024,7 +3028,6 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 
 	return 0;
 }
-
 EXPORT_SYMBOL(tcp_md5_hash_skb_data);
 
 int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
@@ -3034,7 +3037,6 @@ int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
 	sg_init_one(&sg, key->key, key->keylen);
 	return crypto_hash_update(&hp->md5_desc, &sg, key->keylen);
 }
-
 EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
@@ -3306,16 +3308,3 @@ void __init tcp_init(void)
 	tcp_secret_retiring = &tcp_secret_two;
 	tcp_secret_secondary = &tcp_secret_two;
 }
-
-EXPORT_SYMBOL(tcp_close);
-EXPORT_SYMBOL(tcp_disconnect);
-EXPORT_SYMBOL(tcp_getsockopt);
-EXPORT_SYMBOL(tcp_ioctl);
-EXPORT_SYMBOL(tcp_poll);
-EXPORT_SYMBOL(tcp_read_sock);
-EXPORT_SYMBOL(tcp_recvmsg);
-EXPORT_SYMBOL(tcp_sendmsg);
-EXPORT_SYMBOL(tcp_splice_read);
-EXPORT_SYMBOL(tcp_sendpage);
-EXPORT_SYMBOL(tcp_setsockopt);
-EXPORT_SYMBOL(tcp_shutdown);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 04334661fa28..3c426cb318e7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,10 +78,13 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
 int sysctl_tcp_sack __read_mostly = 1;
 int sysctl_tcp_fack __read_mostly = 1;
 int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+EXPORT_SYMBOL(sysctl_tcp_reordering);
 int sysctl_tcp_ecn __read_mostly = 2;
+EXPORT_SYMBOL(sysctl_tcp_ecn);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 2;
+EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 
 int sysctl_tcp_stdurg __read_mostly;
 int sysctl_tcp_rfc1337 __read_mostly;
@@ -419,6 +422,7 @@ void tcp_initialize_rcv_mss(struct sock *sk)
 
 	inet_csk(sk)->icsk_ack.rcv_mss = hint;
 }
+EXPORT_SYMBOL(tcp_initialize_rcv_mss);
 
 /* Receiver "autotuning" code.
  *
@@ -2938,6 +2942,7 @@ void tcp_simple_retransmit(struct sock *sk)
 	}
 	tcp_xmit_retransmit_queue(sk);
 }
+EXPORT_SYMBOL(tcp_simple_retransmit);
 
 /* Process an event, which can update packets-in-flight not trivially.
  * Main goal of this function is to calculate new estimate for left_out,
@@ -3858,6 +3863,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 		}
 	}
 }
+EXPORT_SYMBOL(tcp_parse_options);
 
 static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th)
 {
@@ -3931,6 +3937,7 @@ u8 *tcp_parse_md5sig_option(struct tcphdr *th)
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(tcp_parse_md5sig_option);
 #endif
 
 static inline void tcp_store_ts_recent(struct tcp_sock *tp)
@@ -5432,6 +5439,7 @@ discard:
 	__kfree_skb(skb);
 	return 0;
 }
+EXPORT_SYMBOL(tcp_rcv_established);
 
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 struct tcphdr *th, unsigned len)
@@ -5931,14 +5939,4 @@ discard:
 	}
 	return 0;
 }
-
-EXPORT_SYMBOL(sysctl_tcp_ecn);
-EXPORT_SYMBOL(sysctl_tcp_reordering);
-EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
-EXPORT_SYMBOL(tcp_parse_options);
-#ifdef CONFIG_TCP_MD5SIG
-EXPORT_SYMBOL(tcp_parse_md5sig_option);
-#endif
-EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
-EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8fa32f5ae2ce..44545e8e8c92 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -84,6 +84,7 @@
 
 int sysctl_tcp_tw_reuse __read_mostly;
 int sysctl_tcp_low_latency __read_mostly;
+EXPORT_SYMBOL(sysctl_tcp_low_latency);
 
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -100,6 +101,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr)
 #endif
 
 struct inet_hashinfo tcp_hashinfo;
+EXPORT_SYMBOL(tcp_hashinfo);
 
 static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
 {
@@ -139,7 +141,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 
 	return 0;
 }
-
 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
 /* This will initiate an outgoing connection. */
@@ -267,6 +268,7 @@ failure:
 	inet->inet_dport = 0;
 	return err;
 }
+EXPORT_SYMBOL(tcp_v4_connect);
 
 /*
  * This routine does path mtu discovery as defined in RFC1191.
@@ -545,6 +547,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 
 	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
 }
+EXPORT_SYMBOL(tcp_v4_send_check);
 
 int tcp_v4_gso_send_check(struct sk_buff *skb)
 {
@@ -860,7 +863,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
 {
 	return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->inet_daddr);
 }
-
 EXPORT_SYMBOL(tcp_v4_md5_lookup);
 
 static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
@@ -927,7 +929,6 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 	}
 	return 0;
 }
-
 EXPORT_SYMBOL(tcp_v4_md5_do_add);
 
 static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk,
@@ -965,7 +966,6 @@ int tcp_v4_md5_do_del(struct sock *sk, __be32 addr)
 	}
 	return -ENOENT;
 }
-
 EXPORT_SYMBOL(tcp_v4_md5_do_del);
 
 static void tcp_v4_clear_md5_list(struct sock *sk)
@@ -1138,7 +1138,6 @@ clear_hash_noput:
 	memset(md5_hash, 0, 16);
 	return 1;
 }
-
 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
 
 static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
@@ -1396,6 +1395,7 @@ drop_and_free:
 drop:
 	return 0;
 }
+EXPORT_SYMBOL(tcp_v4_conn_request);
 
 
 /*
@@ -1481,6 +1481,7 @@ exit:
 	dst_release(dst);
 	return NULL;
 }
+EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 
 static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 {
@@ -1610,6 +1611,7 @@ csum_err:
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 }
+EXPORT_SYMBOL(tcp_v4_do_rcv);
 
 /*
  *	From tcp_input.c
@@ -1796,6 +1798,7 @@ int tcp_v4_remember_stamp(struct sock *sk)
 
 	return 0;
 }
+EXPORT_SYMBOL(tcp_v4_remember_stamp);
 
 int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 {
@@ -1835,6 +1838,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
 	.compat_getsockopt = compat_ip_getsockopt,
 #endif
 };
+EXPORT_SYMBOL(ipv4_specific);
 
 #ifdef CONFIG_TCP_MD5SIG
 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
@@ -1963,7 +1967,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
 	percpu_counter_dec(&tcp_sockets_allocated);
 }
-
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
 
 #ifdef CONFIG_PROC_FS
@@ -2367,11 +2370,13 @@ int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
 		rc = -ENOMEM;
 	return rc;
 }
+EXPORT_SYMBOL(tcp_proc_register);
 
 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 {
 	proc_net_remove(net, afinfo->name);
 }
+EXPORT_SYMBOL(tcp_proc_unregister);
 
 static void get_openreq4(struct sock *sk, struct request_sock *req,
 			 struct seq_file *f, int i, int uid, int *len)
@@ -2618,6 +2623,7 @@ struct proto tcp_prot = {
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
 };
+EXPORT_SYMBOL(tcp_prot);
 
 
 static int __net_init tcp_sk_init(struct net *net)
@@ -2648,20 +2654,3 @@ void __init tcp_v4_init(void)
 	if (register_pernet_subsys(&tcp_sk_ops))
 		panic("Failed to create the TCP control socket.\n");
 }
-
-EXPORT_SYMBOL(ipv4_specific);
-EXPORT_SYMBOL(tcp_hashinfo);
-EXPORT_SYMBOL(tcp_prot);
-EXPORT_SYMBOL(tcp_v4_conn_request);
-EXPORT_SYMBOL(tcp_v4_connect);
-EXPORT_SYMBOL(tcp_v4_do_rcv);
-EXPORT_SYMBOL(tcp_v4_remember_stamp);
-EXPORT_SYMBOL(tcp_v4_send_check);
-EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
-
-#ifdef CONFIG_PROC_FS
-EXPORT_SYMBOL(tcp_proc_register);
-EXPORT_SYMBOL(tcp_proc_unregister);
-#endif
-EXPORT_SYMBOL(sysctl_tcp_low_latency);
-
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 794c2e122a41..f25b56cb85cb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -47,7 +47,6 @@ struct inet_timewait_death_row tcp_death_row = {
 	.twcal_timer	= TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
 					    (unsigned long)&tcp_death_row),
 };
-
 EXPORT_SYMBOL_GPL(tcp_death_row);
 
 static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
@@ -262,6 +261,7 @@ kill:
 	inet_twsk_put(tw);
 	return TCP_TW_SUCCESS;
 }
+EXPORT_SYMBOL(tcp_timewait_state_process);
 
 /*
  * Move a socket to time-wait or dead fin-wait-2 state.
@@ -362,7 +362,6 @@ void tcp_twsk_destructor(struct sock *sk)
 		tcp_free_md5sig_pool();
 #endif
 }
-
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
 
 static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
@@ -510,6 +509,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 	}
 	return newsk;
 }
+EXPORT_SYMBOL(tcp_create_openreq_child);
 
 /*
  *	Process an incoming packet for SYN_RECV sockets represented
@@ -706,6 +706,7 @@ embryonic_reset:
 	inet_csk_reqsk_queue_drop(sk, req, prev);
 	return NULL;
 }
+EXPORT_SYMBOL(tcp_check_req);
 
 /*
  * Queue segment on the new socket if the new socket is active,
@@ -737,8 +738,4 @@ int tcp_child_process(struct sock *parent, struct sock *child,
 	sock_put(child);
 	return ret;
 }
-
-EXPORT_SYMBOL(tcp_check_req);
 EXPORT_SYMBOL(tcp_child_process);
-EXPORT_SYMBOL(tcp_create_openreq_child);
-EXPORT_SYMBOL(tcp_timewait_state_process);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 25ff62e35a68..b3f6f099b1a3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -247,6 +247,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	/* Set the clamp no higher than max representable value */
 	(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
 }
+EXPORT_SYMBOL(tcp_select_initial_window);
 
 /* Chose a new window to advertise, update state in tcp_sock for the
  * socket, and return result with RFC1323 scaling applied.  The return
@@ -1189,6 +1190,7 @@ void tcp_mtup_init(struct sock *sk)
 	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
 	icsk->icsk_mtup.probe_size = 0;
 }
+EXPORT_SYMBOL(tcp_mtup_init);
 
 /* This function synchronize snd mss to current pmtu/exthdr set.
 
@@ -1232,6 +1234,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 
 	return mss_now;
 }
+EXPORT_SYMBOL(tcp_sync_mss);
 
 /* Compute the current effective MSS, taking SACKs and IP options,
  * and even PMTU discovery events into account.
@@ -2514,6 +2517,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 
 	return skb;
 }
+EXPORT_SYMBOL(tcp_make_synack);
 
 /* Do all connect socket setups that can be done AF independent. */
 static void tcp_connect_init(struct sock *sk)
@@ -2616,6 +2620,7 @@ int tcp_connect(struct sock *sk)
 				  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
 	return 0;
 }
+EXPORT_SYMBOL(tcp_connect);
 
 /* Send out a delayed ack, the caller does the policy checking
  * to see if we should even be here.  See tcp_input.c:tcp_ack_snd_check()
@@ -2820,10 +2825,3 @@ void tcp_send_probe0(struct sock *sk)
 					  TCP_RTO_MAX);
 	}
 }
-
-EXPORT_SYMBOL(tcp_select_initial_window);
-EXPORT_SYMBOL(tcp_connect);
-EXPORT_SYMBOL(tcp_make_synack);
-EXPORT_SYMBOL(tcp_simple_retransmit);
-EXPORT_SYMBOL(tcp_sync_mss);
-EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 440a5c6004f6..808bb920c9f5 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -41,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk)
 	inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
 				  &tcp_keepalive_timer);
 }
-
 EXPORT_SYMBOL(tcp_init_xmit_timers);
 
 static void tcp_write_err(struct sock *sk)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 3b3813cc80b9..59186ca7808a 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -48,7 +48,6 @@ err:
 
 	return ret;
 }
-
 EXPORT_SYMBOL(xfrm4_tunnel_register);
 
 int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
@@ -72,7 +71,6 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
 
 	return ret;
 }
-
 EXPORT_SYMBOL(xfrm4_tunnel_deregister);
 
 static int tunnel4_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 6610bf76369f..ab76aa928fa9 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -58,6 +58,7 @@ struct proto 	udplite_prot = {
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
 };
+EXPORT_SYMBOL(udplite_prot);
 
 static struct inet_protosw udplite4_protosw = {
 	.type		=  SOCK_DGRAM,
@@ -127,5 +128,3 @@ out_unregister_proto:
 out_register_err:
 	printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__);
 }
-
-EXPORT_SYMBOL(udplite_prot);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index ad8fbb871aa0..06814b6216dc 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -163,5 +163,4 @@ int xfrm4_rcv(struct sk_buff *skb)
 {
 	return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
 }
-
 EXPORT_SYMBOL(xfrm4_rcv);
-- 
cgit v1.2.3-58-ga151


From 9e34a5b51684bc90ac827ec4ba339f3892632eac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 9 Jul 2010 21:22:04 +0000
Subject: net/core: EXPORT_SYMBOL cleanups

CodingStyle cleanups

EXPORT_SYMBOL should immediately follow the symbol declaration.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c   |  8 +++-----
 net/core/flow.c       |  5 ++---
 net/core/gen_stats.c  | 14 ++++++--------
 net/core/iovec.c      |  9 ++++-----
 net/core/link_watch.c |  1 -
 net/core/net-sysfs.c  |  3 +--
 net/core/netevent.c   |  5 ++---
 net/core/netpoll.c    | 19 +++++++++----------
 net/core/rtnetlink.c  |  2 +-
 net/core/scm.c        |  9 ++++-----
 net/core/skbuff.c     |  1 -
 net/core/stream.c     |  6 ------
 net/core/utils.c      |  3 ---
 13 files changed, 32 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index f5b6f43a4c2e..251997a95483 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -219,6 +219,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
 				   &peeked, err);
 }
+EXPORT_SYMBOL(skb_recv_datagram);
 
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 {
@@ -288,7 +289,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 
 	return err;
 }
-
 EXPORT_SYMBOL(skb_kill_datagram);
 
 /**
@@ -373,6 +373,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 fault:
 	return -EFAULT;
 }
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
 
 /**
  *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
@@ -716,6 +717,7 @@ csum_error:
 fault:
 	return -EFAULT;
 }
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
 
 /**
  * 	datagram_poll - generic datagram poll
@@ -770,8 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 
 	return mask;
 }
-
 EXPORT_SYMBOL(datagram_poll);
-EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
-EXPORT_SYMBOL(skb_copy_datagram_iovec);
-EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/flow.c b/net/core/flow.c
index 8c7c91a32f18..f67dcbfe54ef 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -62,6 +62,7 @@ struct flow_cache {
 };
 
 atomic_t flow_cache_genid = ATOMIC_INIT(0);
+EXPORT_SYMBOL(flow_cache_genid);
 static struct flow_cache flow_cache_global;
 static struct kmem_cache *flow_cachep;
 
@@ -291,6 +292,7 @@ ret_object:
 	local_bh_enable();
 	return flo;
 }
+EXPORT_SYMBOL(flow_cache_lookup);
 
 static void flow_cache_flush_tasklet(unsigned long data)
 {
@@ -424,6 +426,3 @@ static int __init flow_cache_init_global(void)
 }
 
 module_init(flow_cache_init_global);
-
-EXPORT_SYMBOL(flow_cache_genid);
-EXPORT_SYMBOL(flow_cache_lookup);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 393b1d8618e2..0452eb27a272 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -73,6 +73,7 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_start_copy_compat);
 
 /**
  * gnet_stats_start_copy_compat - start dumping procedure in compatibility mode
@@ -93,6 +94,7 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
 {
 	return gnet_stats_start_copy_compat(skb, type, 0, 0, lock, d);
 }
+EXPORT_SYMBOL(gnet_stats_start_copy);
 
 /**
  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
@@ -123,6 +125,7 @@ gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
 	}
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_basic);
 
 /**
  * gnet_stats_copy_rate_est - copy rate estimator statistics into statistics TLV
@@ -154,6 +157,7 @@ gnet_stats_copy_rate_est(struct gnet_dump *d,
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_rate_est);
 
 /**
  * gnet_stats_copy_queue - copy queue statistics into statistics TLV
@@ -181,6 +185,7 @@ gnet_stats_copy_queue(struct gnet_dump *d, struct gnet_stats_queue *q)
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_queue);
 
 /**
  * gnet_stats_copy_app - copy application specific statistics into statistics TLV
@@ -208,6 +213,7 @@ gnet_stats_copy_app(struct gnet_dump *d, void *st, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(gnet_stats_copy_app);
 
 /**
  * gnet_stats_finish_copy - finish dumping procedure
@@ -241,12 +247,4 @@ gnet_stats_finish_copy(struct gnet_dump *d)
 	spin_unlock_bh(d->lock);
 	return 0;
 }
-
-
-EXPORT_SYMBOL(gnet_stats_start_copy);
-EXPORT_SYMBOL(gnet_stats_start_copy_compat);
-EXPORT_SYMBOL(gnet_stats_copy_basic);
-EXPORT_SYMBOL(gnet_stats_copy_rate_est);
-EXPORT_SYMBOL(gnet_stats_copy_queue);
-EXPORT_SYMBOL(gnet_stats_copy_app);
 EXPORT_SYMBOL(gnet_stats_finish_copy);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 1e7f4e91a935..1cd98df412df 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -95,6 +95,7 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_toiovec);
 
 /*
  *	Copy kernel to iovec. Returns -EFAULT on error.
@@ -120,6 +121,7 @@ int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_toiovecend);
 
 /*
  *	Copy iovec to kernel. Returns -EFAULT on error.
@@ -144,6 +146,7 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_fromiovec);
 
 /*
  *	Copy iovec from kernel. Returns -EFAULT on error.
@@ -172,6 +175,7 @@ int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 
 	return 0;
 }
+EXPORT_SYMBOL(memcpy_fromiovecend);
 
 /*
  *	And now for the all-in-one: copy and checksum from a user iovec
@@ -256,9 +260,4 @@ out_fault:
 	err = -EFAULT;
 	goto out;
 }
-
 EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
-EXPORT_SYMBOL(memcpy_fromiovec);
-EXPORT_SYMBOL(memcpy_fromiovecend);
-EXPORT_SYMBOL(memcpy_toiovec);
-EXPORT_SYMBOL(memcpy_toiovecend);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index bdbce2f5875b..01a1101b5936 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -243,5 +243,4 @@ void linkwatch_fire_event(struct net_device *dev)
 
 	linkwatch_schedule_work(urgent);
 }
-
 EXPORT_SYMBOL(linkwatch_fire_event);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 914f42b0f039..d2b596537d41 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -924,13 +924,12 @@ int netdev_class_create_file(struct class_attribute *class_attr)
 {
 	return class_create_file(&net_class, class_attr);
 }
+EXPORT_SYMBOL(netdev_class_create_file);
 
 void netdev_class_remove_file(struct class_attribute *class_attr)
 {
 	class_remove_file(&net_class, class_attr);
 }
-
-EXPORT_SYMBOL(netdev_class_create_file);
 EXPORT_SYMBOL(netdev_class_remove_file);
 
 int netdev_kobject_init(void)
diff --git a/net/core/netevent.c b/net/core/netevent.c
index 95f81de87502..865f0ceb81fb 100644
--- a/net/core/netevent.c
+++ b/net/core/netevent.c
@@ -35,6 +35,7 @@ int register_netevent_notifier(struct notifier_block *nb)
 	err = atomic_notifier_chain_register(&netevent_notif_chain, nb);
 	return err;
 }
+EXPORT_SYMBOL_GPL(register_netevent_notifier);
 
 /**
  *	netevent_unregister_notifier - unregister a netevent notifier block
@@ -50,6 +51,7 @@ int unregister_netevent_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_unregister(&netevent_notif_chain, nb);
 }
+EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
 
 /**
  *	call_netevent_notifiers - call all netevent notifier blocks
@@ -64,7 +66,4 @@ int call_netevent_notifiers(unsigned long val, void *v)
 {
 	return atomic_notifier_call_chain(&netevent_notif_chain, val, v);
 }
-
-EXPORT_SYMBOL_GPL(register_netevent_notifier);
-EXPORT_SYMBOL_GPL(unregister_netevent_notifier);
 EXPORT_SYMBOL_GPL(call_netevent_notifiers);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index ca6dc31843ea..c2b7a8bed8f6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -197,11 +197,13 @@ void netpoll_poll_dev(struct net_device *dev)
 	service_arp_queue(dev->npinfo);
 
 }
+EXPORT_SYMBOL(netpoll_poll_dev);
 
 void netpoll_poll(struct netpoll *np)
 {
 	netpoll_poll_dev(np->dev);
 }
+EXPORT_SYMBOL(netpoll_poll);
 
 static void refill_skbs(void)
 {
@@ -313,6 +315,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
 }
+EXPORT_SYMBOL(netpoll_send_skb);
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 {
@@ -374,6 +377,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 
 	netpoll_send_skb(np, skb);
 }
+EXPORT_SYMBOL(netpoll_send_udp);
 
 static void arp_reply(struct sk_buff *skb)
 {
@@ -600,6 +604,7 @@ void netpoll_print_options(struct netpoll *np)
 	printk(KERN_INFO "%s: remote ethernet address %pM\n",
 	                 np->name, np->remote_mac);
 }
+EXPORT_SYMBOL(netpoll_print_options);
 
 int netpoll_parse_options(struct netpoll *np, char *opt)
 {
@@ -692,6 +697,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
 	       np->name, cur);
 	return -1;
 }
+EXPORT_SYMBOL(netpoll_parse_options);
 
 int __netpoll_setup(struct netpoll *np)
 {
@@ -848,6 +854,7 @@ put:
 	dev_put(ndev);
 	return err;
 }
+EXPORT_SYMBOL(netpoll_setup);
 
 static int __init netpoll_init(void)
 {
@@ -908,11 +915,13 @@ void netpoll_cleanup(struct netpoll *np)
 	dev_put(np->dev);
 	np->dev = NULL;
 }
+EXPORT_SYMBOL(netpoll_cleanup);
 
 int netpoll_trap(void)
 {
 	return atomic_read(&trapped);
 }
+EXPORT_SYMBOL(netpoll_trap);
 
 void netpoll_set_trap(int trap)
 {
@@ -921,14 +930,4 @@ void netpoll_set_trap(int trap)
 	else
 		atomic_dec(&trapped);
 }
-
-EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
-EXPORT_SYMBOL(netpoll_trap);
-EXPORT_SYMBOL(netpoll_print_options);
-EXPORT_SYMBOL(netpoll_parse_options);
-EXPORT_SYMBOL(netpoll_setup);
-EXPORT_SYMBOL(netpoll_cleanup);
-EXPORT_SYMBOL(netpoll_send_udp);
-EXPORT_SYMBOL(netpoll_poll_dev);
-EXPORT_SYMBOL(netpoll_poll);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5e773ea2201d..f78d821bd935 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -686,7 +686,7 @@ static size_t rtnl_port_size(const struct net_device *dev)
 		return port_self_size;
 }
 
-static inline size_t if_nlmsg_size(const struct net_device *dev)
+static noinline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
diff --git a/net/core/scm.c b/net/core/scm.c
index 681c976307b5..413cab89017d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -130,6 +130,7 @@ void __scm_destroy(struct scm_cookie *scm)
 		}
 	}
 }
+EXPORT_SYMBOL(__scm_destroy);
 
 int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 {
@@ -211,6 +212,7 @@ error:
 	scm_destroy(p);
 	return err;
 }
+EXPORT_SYMBOL(__scm_send);
 
 int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 {
@@ -249,6 +251,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 out:
 	return err;
 }
+EXPORT_SYMBOL(put_cmsg);
 
 void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 {
@@ -318,6 +321,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 	 */
 	__scm_destroy(scm);
 }
+EXPORT_SYMBOL(scm_detach_fds);
 
 struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 {
@@ -335,9 +339,4 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 	}
 	return new_fpl;
 }
-
-EXPORT_SYMBOL(__scm_destroy);
-EXPORT_SYMBOL(__scm_send);
-EXPORT_SYMBOL(put_cmsg);
-EXPORT_SYMBOL(scm_detach_fds);
 EXPORT_SYMBOL(scm_fp_dup);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 34432b4e96bb..76d33ca5f037 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2483,7 +2483,6 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 	skb_postpull_rcsum(skb, skb->data, len);
 	return skb->data += len;
 }
-
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 
 /**
diff --git a/net/core/stream.c b/net/core/stream.c
index cc196f42b8d8..d959e0f41528 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk)
 		rcu_read_unlock();
 	}
 }
-
 EXPORT_SYMBOL(sk_stream_write_space);
 
 /**
@@ -81,7 +80,6 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 	} while (!done);
 	return 0;
 }
-
 EXPORT_SYMBOL(sk_stream_wait_connect);
 
 /**
@@ -109,7 +107,6 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
 		finish_wait(sk_sleep(sk), &wait);
 	}
 }
-
 EXPORT_SYMBOL(sk_stream_wait_close);
 
 /**
@@ -174,7 +171,6 @@ do_interrupted:
 	err = sock_intr_errno(*timeo_p);
 	goto out;
 }
-
 EXPORT_SYMBOL(sk_stream_wait_memory);
 
 int sk_stream_error(struct sock *sk, int flags, int err)
@@ -185,7 +181,6 @@ int sk_stream_error(struct sock *sk, int flags, int err)
 		send_sig(SIGPIPE, current, 0);
 	return err;
 }
-
 EXPORT_SYMBOL(sk_stream_error);
 
 void sk_stream_kill_queues(struct sock *sk)
@@ -210,5 +205,4 @@ void sk_stream_kill_queues(struct sock *sk)
 	 * have gone away, only the net layer knows can touch it.
 	 */
 }
-
 EXPORT_SYMBOL(sk_stream_kill_queues);
diff --git a/net/core/utils.c b/net/core/utils.c
index 838250241d26..f41854470539 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -77,7 +77,6 @@ __be32 in_aton(const char *str)
 	}
 	return(htonl(l));
 }
-
 EXPORT_SYMBOL(in_aton);
 
 #define IN6PTON_XDIGIT		0x00010000
@@ -162,7 +161,6 @@ out:
 		*end = s;
 	return ret;
 }
-
 EXPORT_SYMBOL(in4_pton);
 
 int in6_pton(const char *src, int srclen,
@@ -280,7 +278,6 @@ out:
 		*end = s;
 	return ret;
 }
-
 EXPORT_SYMBOL(in6_pton);
 
 void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
-- 
cgit v1.2.3-58-ga151


From 70c2efa5a32a7d38e66224844032160317fa7887 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 9 Jul 2010 15:33:25 +0000
Subject: act_nat: not all of the ICMP packets need an IP header payload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

not all of the ICMP packets need an IP header payload, so we check the length
of the skbs only when the packets should have an IP header payload.

Based upon analysis and initial patch by Rodrigo Partearroyo González.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
----
 net/sched/act_nat.c |    5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_nat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 570949417f38..724553e8ed7b 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -205,7 +205,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 	{
 		struct icmphdr *icmph;
 
-		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+		if (!pskb_may_pull(skb, ihl + sizeof(*icmph)))
 			goto drop;
 
 		icmph = (void *)(skb_network_header(skb) + ihl);
@@ -215,6 +215,9 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
 		    (icmph->type != ICMP_PARAMETERPROB))
 			break;
 
+		if (!pskb_may_pull(skb, ihl + sizeof(*icmph) + sizeof(*iph)))
+			goto drop;
+
 		iph = (void *)(icmph + 1);
 		if (egress)
 			addr = iph->daddr;
-- 
cgit v1.2.3-58-ga151


From 336a283b9cbe47748ccd68fd8c5158f67cee644b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 Jul 2010 20:03:42 -0700
Subject: dsa: Fix Kconfig dependencies.

Based upon a report by Randy Dunlap.

DSA needs PHYLIB, but PHYLIB needs NET_ETHERNET.  So, in order
to select PHYLIB we have to make DSA depend upon NET_ETHERNET.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index c51b55400dc5..11201784d29a 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,7 +1,7 @@
 menuconfig NET_DSA
 	bool "Distributed Switch Architecture support"
 	default n
-	depends on EXPERIMENTAL && !S390
+	depends on EXPERIMENTAL && NET_ETHERNET && !S390
 	select PHYLIB
 	---help---
 	  This allows you to use hardware switch chips that use
-- 
cgit v1.2.3-58-ga151


From 5c4bfa17f3ec46becec4b23d12323f7605ebd696 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 9 Jul 2010 23:51:54 +0000
Subject: 9p: strlen() doesn't count the terminator

This is an off by one bug because strlen() doesn't count the NULL
terminator.  We strcpy() addr into a fixed length array of size
UNIX_PATH_MAX later on.

The addr variable is the name of the device being mounted.

CC: stable@kernel.org
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_fd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 98ce9bcb0e15..c85109d809ca 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -948,7 +948,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
 
 	csocket = NULL;
 
-	if (strlen(addr) > UNIX_PATH_MAX) {
+	if (strlen(addr) >= UNIX_PATH_MAX) {
 		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n",
 			addr);
 		return -ENAMETOOLONG;
-- 
cgit v1.2.3-58-ga151


From 7ba42910073f8432934d61a6c08b1023c408fb62 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 10 Jul 2010 20:41:55 +0000
Subject: inet, inet6: make tcp_sendmsg() and tcp_sendpage() through
 inet_sendmsg() and inet_sendpage()

a new boolean flag no_autobind is added to structure proto to avoid the autobind
calls when the protocol is TCP. Then sock_rps_record_flow() is called int the
TCP's sendmsg() and sendpage() pathes.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/net/inet_common.h |    4 ++++
 include/net/sock.h        |    1 +
 include/net/tcp.h         |    8 ++++----
 net/ipv4/af_inet.c        |   15 +++++++++------
 net/ipv4/tcp.c            |   11 +++++------
 net/ipv4/tcp_ipv4.c       |    3 +++
 net/ipv6/af_inet6.c       |    8 ++++----
 net/ipv6/tcp_ipv6.c       |    3 +++
 8 files changed, 33 insertions(+), 20 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_common.h |  4 ++++
 include/net/sock.h        |  1 +
 include/net/tcp.h         |  8 ++++----
 net/ipv4/af_inet.c        | 15 +++++++++------
 net/ipv4/tcp.c            | 11 +++++------
 net/ipv4/tcp_ipv4.c       |  3 +++
 net/ipv6/af_inet6.c       |  8 ++++----
 net/ipv6/tcp_ipv6.c       |  3 +++
 8 files changed, 33 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 140c1ec9f8a2..22fac9892b16 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -21,6 +21,10 @@ extern int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
 extern int inet_accept(struct socket *sock, struct socket *newsock, int flags);
 extern int inet_sendmsg(struct kiocb *iocb, struct socket *sock,
 			struct msghdr *msg, size_t size);
+extern ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+			     size_t size, int flags);
+extern int inet_recvmsg(struct kiocb *iocb, struct socket *sock,
+			struct msghdr *msg, size_t size, int flags);
 extern int inet_shutdown(struct socket *sock, int how);
 extern int inet_listen(struct socket *sock, int backlog);
 extern void inet_sock_destruct(struct sock *sk);
diff --git a/include/net/sock.h b/include/net/sock.h
index 4f26f2f83be9..3100e71f0c3d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -772,6 +772,7 @@ struct proto {
 	int			*sysctl_wmem;
 	int			*sysctl_rmem;
 	int			max_header;
+	bool			no_autobind;
 
 	struct kmem_cache	*slab;
 	unsigned int		obj_size;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 33ce5242a17a..468b01f01c13 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -304,10 +304,10 @@ extern int tcp_v4_rcv(struct sk_buff *skb);
 
 extern int tcp_v4_remember_stamp(struct sock *sk);
 extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
-extern int tcp_sendmsg(struct kiocb *iocb, struct socket *sock,
-		       struct msghdr *msg, size_t size);
-extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-			    size_t size, int flags);
+extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+		       size_t size);
+extern int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+			size_t size, int flags);
 extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				 struct tcphdr *th, unsigned len);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3ceb025b16f2..6a1100c25a9f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -727,28 +727,31 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	    inet_autobind(sk))
 		return -EAGAIN;
 
 	return sk->sk_prot->sendmsg(iocb, sk, msg, size);
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
-static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
-			     size_t size, int flags)
+ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
+		      size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 
 	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
-	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+	    inet_autobind(sk))
 		return -EAGAIN;
 
 	if (sk->sk_prot->sendpage)
 		return sk->sk_prot->sendpage(sk, page, offset, size, flags);
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
+EXPORT_SYMBOL(inet_sendpage);
 
 int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
@@ -894,10 +897,10 @@ const struct proto_ops inet_stream_ops = {
 	.shutdown	   = inet_shutdown,
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
-	.sendmsg	   = tcp_sendmsg,
+	.sendmsg	   = inet_sendmsg,
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
+	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8601b7683a6..9fce8a8a13aa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -857,15 +857,15 @@ out_err:
 	return sk_stream_error(sk, flags, err);
 }
 
-ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
-		     size_t size, int flags)
+int tcp_sendpage(struct sock *sk, struct page *page, int offset,
+		 size_t size, int flags)
 {
 	ssize_t res;
-	struct sock *sk = sock->sk;
 
 	if (!(sk->sk_route_caps & NETIF_F_SG) ||
 	    !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
-		return sock_no_sendpage(sock, page, offset, size, flags);
+		return sock_no_sendpage(sk->sk_socket, page, offset, size,
+					flags);
 
 	lock_sock(sk);
 	TCP_CHECK_TIMER(sk);
@@ -899,10 +899,9 @@ static inline int select_size(struct sock *sk, int sg)
 	return tmp;
 }
 
-int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
-	struct sock *sk = sock->sk;
 	struct iovec *iov;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44545e8e8c92..020766292bb0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2600,6 +2600,8 @@ struct proto tcp_prot = {
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v4_do_rcv,
 	.hash			= inet_hash,
 	.unhash			= inet_unhash,
@@ -2618,6 +2620,7 @@ struct proto tcp_prot = {
 	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e830cd4f9d0f..56b9bf2516f4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -522,10 +522,10 @@ const struct proto_ops inet6_stream_ops = {
 	.shutdown	   = inet_shutdown,		/* ok		*/
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
-	.sendmsg	   = tcp_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.sendmsg	   = inet_sendmsg,		/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
-	.sendpage	   = tcp_sendpage,
+	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
@@ -549,7 +549,7 @@ const struct proto_ops inet6_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,	/* ok		*/
 	.getsockopt	   = sock_common_getsockopt,	/* ok		*/
 	.sendmsg	   = inet_sendmsg,		/* ok		*/
-	.recvmsg	   = sock_common_recvmsg,	/* ok		*/
+	.recvmsg	   = inet_recvmsg,		/* ok		*/
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5ebc27ecebdc..fe6d40418c0b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2142,6 +2142,8 @@ struct proto tcpv6_prot = {
 	.setsockopt		= tcp_setsockopt,
 	.getsockopt		= tcp_getsockopt,
 	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.sendpage		= tcp_sendpage,
 	.backlog_rcv		= tcp_v6_do_rcv,
 	.hash			= tcp_v6_hash,
 	.unhash			= inet_unhash,
@@ -2160,6 +2162,7 @@ struct proto tcpv6_prot = {
 	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
-- 
cgit v1.2.3-58-ga151


From d361fd599a991ff6c1d522a599c635b35d61ef30 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 10 Jul 2010 22:45:17 +0000
Subject: net: sock_free() optimizations

Avoid two extra instructions in sock_free(), to reload
skb->truesize and skb->sk

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index fef2434b7c8c..363bc260157c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1339,9 +1339,10 @@ EXPORT_SYMBOL(sock_wfree);
 void sock_rfree(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
+	unsigned int len = skb->truesize;
 
-	atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
-	sk_mem_uncharge(skb->sk, skb->truesize);
+	atomic_sub(len, &sk->sk_rmem_alloc);
+	sk_mem_uncharge(sk, len);
 }
 EXPORT_SYMBOL(sock_rfree);
 
-- 
cgit v1.2.3-58-ga151


From 15fd0cd9a2ad24a78fbee369dec8ca660979d57e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 11 Jul 2010 11:18:57 +0000
Subject: net: autoconvert trivial BKL users to private mutex

All these files use the big kernel lock in a trivial
way to serialize their private file operations,
typically resulting from an earlier semi-automatic
pushdown from VFS.

None of these drivers appears to want to lock against
other code, and they all use the BKL as the top-level
lock in their file operations, meaning that there
is no lock-order inversion problem.

Consequently, we can remove the BKL completely,
replacing it with a per-file mutex in every case.
Using a scripted approach means we can avoid
typos.

file=$1
name=$2
if grep -q lock_kernel ${file} ; then
    if grep -q 'include.*linux.mutex.h' ${file} ; then
            sed -i '/include.*<linux\/smp_lock.h>/d' ${file}
    else
            sed -i 's/include.*<linux\/smp_lock.h>.*$/include <linux\/mutex.h>/g' ${file}
    fi
    sed -i ${file} \
        -e "/^#include.*linux.mutex.h/,$ {
                1,/^\(static\|int\|long\)/ {
                     /^\(static\|int\|long\)/istatic DEFINE_MUTEX(${name}_mutex);

} }"  \
    -e "s/\(un\)*lock_kernel\>[ ]*()/mutex_\1lock(\&${name}_mutex)/g" \
    -e '/[      ]*cycle_kernel_lock();/d'
else
    sed -i -e '/include.*\<smp_lock.h\>/d' ${file}  \
                -e '/cycle_kernel_lock()/d'
fi

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_generic.c | 19 +++++++++----------
 drivers/net/wan/cosa.c    | 10 +++++-----
 net/wanrouter/wanmain.c   |  7 ++++---
 net/wanrouter/wanproc.c   |  7 ++++---
 4 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index e7b4187da057..6695a51e09e9 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -40,7 +40,6 @@
 #include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
 #include <linux/stddef.h>
@@ -180,6 +179,7 @@ struct channel {
  * channel.downl.
  */
 
+static DEFINE_MUTEX(ppp_mutex);
 static atomic_t ppp_unit_count = ATOMIC_INIT(0);
 static atomic_t channel_count = ATOMIC_INIT(0);
 
@@ -362,7 +362,6 @@ static const int npindex_to_ethertype[NUM_NP] = {
  */
 static int ppp_open(struct inode *inode, struct file *file)
 {
-	cycle_kernel_lock();
 	/*
 	 * This could (should?) be enforced by the permissions on /dev/ppp.
 	 */
@@ -582,7 +581,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		 * this fd and reopening /dev/ppp.
 		 */
 		err = -EINVAL;
-		lock_kernel();
+		mutex_lock(&ppp_mutex);
 		if (pf->kind == INTERFACE) {
 			ppp = PF_TO_PPP(pf);
 			if (file == ppp->owner)
@@ -594,7 +593,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		} else
 			printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n",
 			       atomic_long_read(&file->f_count));
-		unlock_kernel();
+		mutex_unlock(&ppp_mutex);
 		return err;
 	}
 
@@ -602,7 +601,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		struct channel *pch;
 		struct ppp_channel *chan;
 
-		lock_kernel();
+		mutex_lock(&ppp_mutex);
 		pch = PF_TO_CHANNEL(pf);
 
 		switch (cmd) {
@@ -624,7 +623,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 				err = chan->ops->ioctl(chan, cmd, arg);
 			up_read(&pch->chan_sem);
 		}
-		unlock_kernel();
+		mutex_unlock(&ppp_mutex);
 		return err;
 	}
 
@@ -634,7 +633,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return -EINVAL;
 	}
 
-	lock_kernel();
+	mutex_lock(&ppp_mutex);
 	ppp = PF_TO_PPP(pf);
 	switch (cmd) {
 	case PPPIOCSMRU:
@@ -782,7 +781,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	default:
 		err = -ENOTTY;
 	}
-	unlock_kernel();
+	mutex_unlock(&ppp_mutex);
 	return err;
 }
 
@@ -795,7 +794,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 	struct ppp_net *pn;
 	int __user *p = (int __user *)arg;
 
-	lock_kernel();
+	mutex_lock(&ppp_mutex);
 	switch (cmd) {
 	case PPPIOCNEWUNIT:
 		/* Create a new ppp unit */
@@ -846,7 +845,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
 	default:
 		err = -ENOTTY;
 	}
-	unlock_kernel();
+	mutex_unlock(&ppp_mutex);
 	return err;
 }
 
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index f0bd70fb650c..04c6cd4333f1 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -89,7 +89,6 @@
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/device.h>
-#include <linux/smp_lock.h>
 #include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/byteorder.h>
@@ -174,6 +173,7 @@ struct cosa_data {
  * Character device major number. 117 was allocated for us.
  * The value of 0 means to allocate a first free one.
  */
+static DEFINE_MUTEX(cosa_chardev_mutex);
 static int cosa_major = 117;
 
 /*
@@ -944,7 +944,7 @@ static int cosa_open(struct inode *inode, struct file *file)
 	int n;
 	int ret = 0;
 
-	lock_kernel();
+	mutex_lock(&cosa_chardev_mutex);
 	if ((n=iminor(file->f_path.dentry->d_inode)>>CARD_MINOR_BITS)
 		>= nr_cards) {
 		ret = -ENODEV;
@@ -976,7 +976,7 @@ static int cosa_open(struct inode *inode, struct file *file)
 	chan->rx_done = chrdev_rx_done;
 	spin_unlock_irqrestore(&cosa->lock, flags);
 out:
-	unlock_kernel();
+	mutex_unlock(&cosa_chardev_mutex);
 	return ret;
 }
 
@@ -1212,10 +1212,10 @@ static long cosa_chardev_ioctl(struct file *file, unsigned int cmd,
 	struct cosa_data *cosa;
 	long ret;
 
-	lock_kernel();
+	mutex_lock(&cosa_chardev_mutex);
 	cosa = channel->cosa;
 	ret = cosa_ioctl_common(cosa, channel, cmd, arg);
-	unlock_kernel();
+	mutex_unlock(&cosa_chardev_mutex);
 	return ret;
 }
 
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 258daa80ad92..2bf23406637a 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -48,7 +48,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>	/* support for loadable modules */
 #include <linux/slab.h>		/* kmalloc(), kfree() */
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
 #include <linux/mm.h>
 #include <linux/string.h>	/* inline mem*, str* functions */
 
@@ -71,6 +71,7 @@
  *	WAN device IOCTL handlers
  */
 
+static DEFINE_MUTEX(wanrouter_mutex);
 static int wanrouter_device_setup(struct wan_device *wandev,
 				  wandev_conf_t __user *u_conf);
 static int wanrouter_device_stat(struct wan_device *wandev,
@@ -376,7 +377,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	if (wandev->magic != ROUTER_MAGIC)
 		return -EINVAL;
 
-	lock_kernel();
+	mutex_lock(&wanrouter_mutex);
 	switch (cmd) {
 	case ROUTER_SETUP:
 		err = wanrouter_device_setup(wandev, data);
@@ -408,7 +409,7 @@ long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 			err = wandev->ioctl(wandev, cmd, arg);
 		else err = -EINVAL;
 	}
-	unlock_kernel();
+	mutex_unlock(&wanrouter_mutex);
 	return err;
 }
 
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index c44d96b3a437..11f25c7a7a05 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -27,7 +27,7 @@
 #include <linux/module.h>
 #include <linux/wanrouter.h>	/* WAN router API definitions */
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
+#include <linux/mutex.h>
 
 #include <net/net_namespace.h>
 #include <asm/io.h>
@@ -66,6 +66,7 @@
  *	/proc/net/router
  */
 
+static DEFINE_MUTEX(config_mutex);
 static struct proc_dir_entry *proc_router;
 
 /* Strings */
@@ -85,7 +86,7 @@ static void *r_start(struct seq_file *m, loff_t *pos)
 	struct wan_device *wandev;
 	loff_t l = *pos;
 
-	lock_kernel();
+	mutex_lock(&config_mutex);
 	if (!l--)
 		return SEQ_START_TOKEN;
 	for (wandev = wanrouter_router_devlist; l-- && wandev;
@@ -104,7 +105,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
 static void r_stop(struct seq_file *m, void *v)
 	__releases(kernel_lock)
 {
-	unlock_kernel();
+	mutex_unlock(&config_mutex);
 }
 
 static int config_show(struct seq_file *m, void *v)
-- 
cgit v1.2.3-58-ga151


From 8a994a7180104b305970232e160b18523c1fbd6a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 12 Jul 2010 10:50:23 +0000
Subject: net/core: Remove unnecessary casts of private_data

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1ee2ebd9b04f..24a19debda1b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -846,7 +846,7 @@ static ssize_t pktgen_if_write(struct file *file,
 			       const char __user * user_buffer, size_t count,
 			       loff_t * offset)
 {
-	struct seq_file *seq = (struct seq_file *)file->private_data;
+	struct seq_file *seq = file->private_data;
 	struct pktgen_dev *pkt_dev = seq->private;
 	int i = 0, max, len;
 	char name[16], valstr[32];
@@ -1776,7 +1776,7 @@ static ssize_t pktgen_thread_write(struct file *file,
 				   const char __user * user_buffer,
 				   size_t count, loff_t * offset)
 {
-	struct seq_file *seq = (struct seq_file *)file->private_data;
+	struct seq_file *seq = file->private_data;
 	struct pktgen_thread *t = seq->private;
 	int i = 0, max, len, ret;
 	char name[40];
-- 
cgit v1.2.3-58-ga151


From 55a40e243210b72169eaa1cbd9b6edc6097571f8 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 12 Jul 2010 10:50:24 +0000
Subject: net/irda: Remove unnecessary casts of private_data

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/irda/irnet/irnet_ppp.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 6a1a202710c5..800bc53b7f63 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -527,7 +527,7 @@ static int
 dev_irnet_close(struct inode *	inode,
 		struct file *	file)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
 
   DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
 	 file, ap);
@@ -564,7 +564,7 @@ dev_irnet_write(struct file *	file,
 		size_t		count,
 		loff_t *	ppos)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
 
   DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
 	file, ap, count);
@@ -588,7 +588,7 @@ dev_irnet_read(struct file *	file,
 	       size_t		count,
 	       loff_t *		ppos)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
 
   DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%Zd)\n",
 	file, ap, count);
@@ -609,7 +609,7 @@ static unsigned int
 dev_irnet_poll(struct file *	file,
 	       poll_table *	wait)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
   unsigned int		mask;
 
   DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n",
@@ -638,7 +638,7 @@ dev_irnet_ioctl(
 		unsigned int	cmd,
 		unsigned long	arg)
 {
-  irnet_socket *	ap = (struct irnet_socket *) file->private_data;
+  irnet_socket *	ap = file->private_data;
   int			err;
   int			val;
   void __user *argp = (void __user *)arg;
-- 
cgit v1.2.3-58-ga151


From d809ec895505e6f35fb1965f0946381ab4eaa474 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Mon, 12 Jul 2010 21:29:42 +0000
Subject: xfrm: do not assume that template resolving always returns xfrms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

xfrm_resolve_and_create_bundle() assumed that, if policies indicated
presence of xfrms, bundle template resolution would always return
some xfrms. This is not true for 'use' level policies which can
result in no xfrm's being applied if there is no suitable xfrm states.
This fixes a crash by this incorrect assumption.

Reported-by: George Spelvin <linux@horizon.com>
Bisected-by: George Spelvin <linux@horizon.com>
Tested-by: George Spelvin <linux@horizon.com>
Signed-off-by: Timo Teräs <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index af1c173be4ad..a7ec5a8a2380 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1594,8 +1594,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
 
 	/* Try to instantiate a bundle */
 	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
-	if (err < 0) {
-		if (err != -EAGAIN)
+	if (err <= 0) {
+		if (err != 0 && err != -EAGAIN)
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 		return ERR_PTR(err);
 	}
@@ -1678,6 +1678,13 @@ xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
 			goto make_dummy_bundle;
 		dst_hold(&xdst->u.dst);
 		return oldflo;
+	} else if (new_xdst == NULL) {
+		num_xfrms = 0;
+		if (oldflo == NULL)
+			goto make_dummy_bundle;
+		xdst->num_xfrms = 0;
+		dst_hold(&xdst->u.dst);
+		return oldflo;
 	}
 
 	/* Kill the previous bundle */
@@ -1760,6 +1767,10 @@ restart:
 				xfrm_pols_put(pols, num_pols);
 				err = PTR_ERR(xdst);
 				goto dropdst;
+			} else if (xdst == NULL) {
+				num_xfrms = 0;
+				drop_pols = num_pols;
+				goto no_transform;
 			}
 
 			spin_lock_bh(&xfrm_policy_sk_bundle_lock);
-- 
cgit v1.2.3-58-ga151


From 3a047bf87b1b6f69c62ab9fb28072c639cb7e2fa Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 12 Jul 2010 21:00:12 +0000
Subject: rfs: call sock_rps_record_flow() in tcp_splice_read()

rfs: call sock_rps_record_flow() in tcp_splice_read()

call sock_rps_record_flow() in tcp_splice_read(), so the applications using
splice(2) or sendfile(2) can utilize RFS.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/ipv4/tcp.c |    1 +
 1 file changed, 1 insertion(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6596b4feeddc..65afeaec15b7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -608,6 +608,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 	ssize_t spliced;
 	int ret;
 
+	sock_rps_record_flow(sk);
 	/*
 	 * We can't seek on a socket input
 	 */
-- 
cgit v1.2.3-58-ga151


From 87fd308cfc6b2e880bf717a740bd5c58d2aed10c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 13 Jul 2010 05:24:20 +0000
Subject: net: skb_tx_hash() fix relative to skb_orphan_try()

commit fc6055a5ba31e2 (net: Introduce skb_orphan_try()) added early
orphaning of skbs.

This unfortunately added a performance regression in skb_tx_hash() in
case of stacked devices (bonding, vlans, ...)

Since skb->sk is now NULL, we cannot access sk->sk_hash anymore to
spread tx packets to multiple NIC queues on multiqueue devices.

skb_tx_hash() in this case only uses skb->protocol, same value for all
flows.

skb_orphan_try() can copy sk->sk_hash into skb->rxhash and skb_tx_hash()
can use this saved sk_hash value to compute its internal hash value.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 723a34710ad4..4b05fdf762ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1911,8 +1911,16 @@ static int dev_gso_segment(struct sk_buff *skb)
  */
 static inline void skb_orphan_try(struct sk_buff *skb)
 {
-	if (!skb_tx(skb)->flags)
+	struct sock *sk = skb->sk;
+
+	if (sk && !skb_tx(skb)->flags) {
+		/* skb_tx_hash() wont be able to get sk.
+		 * We copy sk_hash into skb->rxhash
+		 */
+		if (!skb->rxhash)
+			skb->rxhash = sk->sk_hash;
 		skb_orphan(skb);
+	}
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1998,8 +2006,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
-		hash = (__force u16) skb->protocol;
-
+		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);
 
 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
-- 
cgit v1.2.3-58-ga151


From 0eff683f737bf684dc9299e2eaca79cceb80a8c1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 14 Jul 2010 17:56:37 -0700
Subject: net/sched: potential data corruption

The reset_policy() does:
        memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
        strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);

In the original code, the size of d->tcfd_defdata wasn't fixed and if
strlen(defdata) was less than 31, reset_policy() would cause memory
corruption.

Please Note:  The original alloc_defdata() assumes defdata is 32
characters and a NUL terminator while reset_policy() assumes defdata is
31 characters and a NUL.  This patch updates alloc_defdata() to match
reset_policy() (ie a shorter string).  I'm not very familiar with this
code so please review carefully.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_simple.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 1b4bc691d7d1..4a1d640b0cf1 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -73,10 +73,10 @@ static int tcf_simp_release(struct tcf_defact *d, int bind)
 
 static int alloc_defdata(struct tcf_defact *d, char *defdata)
 {
-	d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL);
+	d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL);
 	if (unlikely(!d->tcfd_defdata))
 		return -ENOMEM;
-
+	strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From 91a72a70594e5212c97705ca6a694bd307f7a26b Mon Sep 17 00:00:00 2001
From: Doug Kehn <rdkehn@yahoo.com>
Date: Wed, 14 Jul 2010 18:02:16 -0700
Subject: net/core: neighbour update Oops

When configuring DMVPN (GRE + openNHRP) and a GRE remote
address is configured a kernel Oops is observed.  The
obserseved Oops is caused by a NULL header_ops pointer
(neigh->dev->header_ops) in neigh_update_hhs() when

void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
= neigh->dev->header_ops->cache_update;

is executed.  The dev associated with the NULL header_ops is
the GRE interface.  This patch guards against the
possibility that header_ops is NULL.

This Oops was first observed in kernel version 2.6.26.8.

Signed-off-by: Doug Kehn <rdkehn@yahoo.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6ba1c0eece03..a4e0a7482c2b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -949,7 +949,10 @@ static void neigh_update_hhs(struct neighbour *neigh)
 {
 	struct hh_cache *hh;
 	void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
-		= neigh->dev->header_ops->cache_update;
+		= NULL;
+
+	if (neigh->dev->header_ops)
+		update = neigh->dev->header_ops->cache_update;
 
 	if (update) {
 		for (hh = neigh->hh; hh; hh = hh->hh_next) {
-- 
cgit v1.2.3-58-ga151


From 54874868585ffa5d73d7ab8a5a32ea7dcdec1441 Mon Sep 17 00:00:00 2001
From: Chihau Chau <chihau@gmail.com>
Date: Wed, 14 Jul 2010 18:27:09 -0700
Subject: Net: ethernet: pe2.c: fix EXPORT_SYMBOL macro code style issue

This patch fix a code style issue, if a function is exported, the
EXPORT_SYMBOL macro for it should follow immediately after the closing
function brace line.

Signed-off-by: Chihau Chau <chihau@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/pe2.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ethernet/pe2.c b/net/ethernet/pe2.c
index eb00796758c3..85d574addbc1 100644
--- a/net/ethernet/pe2.c
+++ b/net/ethernet/pe2.c
@@ -28,11 +28,10 @@ struct datalink_proto *make_EII_client(void)
 
 	return proto;
 }
+EXPORT_SYMBOL(make_EII_client);
 
 void destroy_EII_client(struct datalink_proto *dl)
 {
 	kfree(dl);
 }
-
 EXPORT_SYMBOL(destroy_EII_client);
-EXPORT_SYMBOL(make_EII_client);
-- 
cgit v1.2.3-58-ga151


From b0f77d0eae0c58a5a9691a067ada112ceeae2d00 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 14 Jul 2010 20:50:29 -0700
Subject: net: fix problem in reading sock TX queue

Fix problem in reading the tx_queue recorded in a socket.  In
dev_pick_tx, the TX queue is read by doing a check with
sk_tx_queue_recorded on the socket, followed by a sk_tx_queue_get.
The problem is that there is not mutual exclusion across these
calls in the socket so it it is possible that the queue in the
sock can be invalidated after sk_tx_queue_recorded is called so
that sk_tx_queue get returns -1, which sets 65535 in queue_index
and thus dev_pick_tx returns 65536 which is a bogus queue and
can cause crash in dev_queue_xmit.

We fix this by only calling sk_tx_queue_get which does the proper
checks.  The interface is that sk_tx_queue_get returns the TX queue
if the sock argument is non-NULL and TX queue is recorded, else it
returns -1.  sk_tx_queue_recorded is no longer used so it can be
completely removed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 7 +------
 net/core/dev.c     | 7 +++----
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 731150d52799..0a691ea7654a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1224,12 +1224,7 @@ static inline void sk_tx_queue_clear(struct sock *sk)
 
 static inline int sk_tx_queue_get(const struct sock *sk)
 {
-	return sk->sk_tx_queue_mapping;
-}
-
-static inline bool sk_tx_queue_recorded(const struct sock *sk)
-{
-	return (sk && sk->sk_tx_queue_mapping >= 0);
+	return sk ? sk->sk_tx_queue_mapping : -1;
 }
 
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
diff --git a/net/core/dev.c b/net/core/dev.c
index 4b05fdf762ab..0ea10f849be8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2029,12 +2029,11 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 					struct sk_buff *skb)
 {
-	u16 queue_index;
+	int queue_index;
 	struct sock *sk = skb->sk;
 
-	if (sk_tx_queue_recorded(sk)) {
-		queue_index = sk_tx_queue_get(sk);
-	} else {
+	queue_index = sk_tx_queue_get(sk);
+	if (queue_index < 0) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 
 		if (ops->ndo_select_queue) {
-- 
cgit v1.2.3-58-ga151


From bb7a0bd600ac2e09a8747ef89e692a2967ed8c97 Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Thu, 15 Jul 2010 08:47:33 +0000
Subject: net: bridge: fix sign bug

ipv6_skip_exthdr() can return error code that is below zero.
'offset' is unsigned, so it makes no sense.
ipv6_skip_exthdr() returns 'int' so we can painlessly change type of
offset to int.

Signed-off-by: Kulikov Vasiliy <segooon@gmail.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 27ae946363f1..85afcdab4921 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1435,7 +1435,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	struct icmp6hdr *icmp6h;
 	u8 nexthdr;
 	unsigned len;
-	unsigned offset;
+	int offset;
 	int err;
 
 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
-- 
cgit v1.2.3-58-ga151


From e40dbc51fbcc3281bb52ecf0f5bec693d36e2aea Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Thu, 15 Jul 2010 13:22:33 +0000
Subject: ipmr: Don't leak memory if fib lookup fails.

This was detected using two mcast router tables.  The
pimreg for the second interface did not have a specific
mrule, so packets received by it were handled by the
default table, which had nothing configured.

This caused the ipmr_fib_lookup to fail, causing
the memory leak.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 757f25eb9b4b..7f6273506eea 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -442,8 +442,10 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 	int err;
 
 	err = ipmr_fib_lookup(net, &fl, &mrt);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -1728,8 +1730,10 @@ int ip_mr_input(struct sk_buff *skb)
 		goto dont_forward;
 
 	err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
-	if (err < 0)
+	if (err < 0) {
+		kfree_skb(skb);
 		return err;
+	}
 
 	if (!local) {
 		    if (IPCB(skb)->opt.router_alert) {
-- 
cgit v1.2.3-58-ga151


From d9a9dc66eb8a8fd85c8546247e7e1d6023d0eb0f Mon Sep 17 00:00:00 2001
From: Arnaud Ebalard <arno@natisbad.org>
Date: Fri, 16 Jul 2010 00:38:44 +0000
Subject: IPv6: fix CoA check in RH2 input handler (mip6_rthdr_input())

The input handler for Type 2 Routing Header (mip6_rthdr_input())
checks if the CoA in the packet matches the CoA in the XFRM state.

Current check is buggy: it compares the adddress in the Type 2
Routing Header, i.e. the HoA, against the expected CoA in the state.
The comparison should be made against the address in the destination
field of the IPv6 header.

The bug remained unnoticed because the main (and possibly only current)
user of the code (UMIP MIPv6 Daemon) initializes the XFRM state with the
unspecified address, i.e. explicitly allows everything.

Yoshifuji-san, can you ack that one?

Signed-off-by: Arnaud Ebalard <arno@natisbad.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/mip6.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 2794b6002836..d6e9599d0705 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -347,11 +347,12 @@ static const struct xfrm_type mip6_destopt_type =
 
 static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb)
 {
+	struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data;
 	int err = rt2->rt_hdr.nexthdr;
 
 	spin_lock(&x->lock);
-	if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) &&
+	if (!ipv6_addr_equal(&iph->daddr, (struct in6_addr *)x->coaddr) &&
 	    !ipv6_addr_any((struct in6_addr *)x->coaddr))
 		err = -ENOENT;
 	spin_unlock(&x->lock);
-- 
cgit v1.2.3-58-ga151


From 8e64159dfb480b30233d947d5a3cd793dfea738f Mon Sep 17 00:00:00 2001
From: Kulikov Vasiliy <segooon@gmail.com>
Date: Sat, 17 Jul 2010 05:21:00 +0000
Subject: net: dccp: fix sign bug

'gap' is unsigned, so this code is wrong:

    gap = -new_head;
    ...
    if (gap > 0) { ... }

Make 'gap' signed.

The semantic patch that finds this problem (many false-positive results):
(http://coccinelle.lip6.fr/)

// <smpl>
@ r1 @
identifier f;
@@
int f(...) { ... }

@@
identifier r1.f;
type T;
unsigned T x;
@@

*x = f(...)
 ...
*x > 0

Signed-off-by: Kulikov Vasiliy <segooon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ackvec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 2abddee48304..92a6fcb40d7d 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -201,7 +201,7 @@ static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
 						 const unsigned int packets,
 						 const unsigned char state)
 {
-	unsigned int gap;
+	long gap;
 	long new_head;
 
 	if (av->av_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
-- 
cgit v1.2.3-58-ga151


From 01893c82b4e6949f4e3a453c4faea34970359d76 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sun, 18 Jul 2010 15:29:14 -0700
Subject: net: Remove MAX_SOCK_ADDR constant

MAX_SOCK_ADDR is no longer used because commit 230b1839 "net: Use standard
structures for generic socket address structures." replaced
"char address[MAX_SOCK_ADDR];" with "struct sockaddr_storage address;".

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index acfa1738663d..6fe484122a44 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -170,15 +170,6 @@ static DEFINE_PER_CPU(int, sockets_in_use);
  * divide and look after the messy bits.
  */
 
-#define MAX_SOCK_ADDR	128		/* 108 for Unix domain -
-					   16 for IP, 16 for IPX,
-					   24 for IPv6,
-					   about 80 for AX.25
-					   must be at least one bigger than
-					   the AF_UNIX size (see net/unix/af_unix.c
-					   :unix_mkname()).
-					 */
-
 /**
  *	move_addr_to_kernel	-	copy a socket address into kernel space
  *	@uaddr: Address in user space
-- 
cgit v1.2.3-58-ga151


From ad1afb00393915a51c21b1ae8704562bf036855f Mon Sep 17 00:00:00 2001
From: Pedro Garcia <pedro.netdev@dondevamos.com>
Date: Sun, 18 Jul 2010 15:38:44 -0700
Subject: vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)

- Without the 8021q module loaded in the kernel, all 802.1p packets
(VLAN 0 but QoS tagging) are silently discarded (as expected, as
the protocol is not loaded).

- Without this patch in 8021q module, these packets are forwarded to
the module, but they are discarded also if VLAN 0 is not configured,
which should not be the default behaviour, as VLAN 0 is not really
a VLANed packet but a 802.1p packet. Defining VLAN 0 makes it almost
impossible to communicate with mixed 802.1p and non 802.1p devices on
the same network due to arp table issues.

- Changed logic to skip vlan specific code in vlan_skb_recv if VLAN
is 0 and we have not defined a VLAN with ID 0, but we accept the
packet with the encapsulated proto and pass it later to netif_rx.

- In the vlan device event handler, added some logic to add VLAN 0
to HW filter in devices that support it (this prevented any traffic
in VLAN 0 to reach the stack in e1000e with HW filter under 2.6.35,
and probably also with other HW filtered cards, so we fix it here).

- In the vlan unregister logic, prevent the elimination of VLAN 0
in devices with HW filter.

- The default behaviour is to ignore the VLAN 0 tagging and accept
the packet as if it was not tagged, but we can still define a
VLAN 0 if desired (so it is backwards compatible).

Signed-off-by: Pedro Garcia <pedro.netdev@dondevamos.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c      |  13 ++++++-
 net/8021q/vlan_core.c |  19 ++++++++--
 net/8021q/vlan_dev.c  | 103 +++++++++++++++++++++++++++++---------------------
 3 files changed, 86 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 3c1c8c14e929..a2ad15250575 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -155,9 +155,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	BUG_ON(!grp);
 
 	/* Take it out of our own structures, but be sure to interlock with
-	 * HW accelerating devices or SW vlan input packet processing.
+	 * HW accelerating devices or SW vlan input packet processing if
+	 * VLAN is not 0 (leave it there for 802.1p).
 	 */
-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
 		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
 
 	grp->nr_vlans--;
@@ -419,6 +420,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	if (is_vlan_dev(dev))
 		__vlan_device_event(dev, event);
 
+	if ((event == NETDEV_UP) &&
+	    (dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	    dev->netdev_ops->ndo_vlan_rx_add_vid) {
+		pr_info("8021q: adding VLAN 0 to HW filter on device %s\n",
+			dev->name);
+		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
+	}
+
 	grp = __vlan_find_group(dev);
 	if (!grp)
 		goto out;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 1b9406a31f0c..01ddb0472f86 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -8,6 +8,9 @@
 int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 		      u16 vlan_tci, int polling)
 {
+	struct net_device *vlan_dev;
+	u16 vlan_id;
+
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
@@ -16,9 +19,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
 
 	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
+	vlan_id = vlan_tci & VLAN_VID_MASK;
+	vlan_dev = vlan_group_get_device(grp, vlan_id);
 
-	if (!skb->dev)
+	if (vlan_dev)
+		skb->dev = vlan_dev;
+	else if (vlan_id)
 		goto drop;
 
 	return (polling ? netif_receive_skb(skb) : netif_rx(skb));
@@ -83,15 +89,20 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
 		unsigned int vlan_tci, struct sk_buff *skb)
 {
 	struct sk_buff *p;
+	struct net_device *vlan_dev;
+	u16 vlan_id;
 
 	if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
 		skb->deliver_no_wcard = 1;
 
 	skb->skb_iif = skb->dev->ifindex;
 	__vlan_hwaccel_put_tag(skb, vlan_tci);
-	skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK);
+	vlan_id = vlan_tci & VLAN_VID_MASK;
+	vlan_dev = vlan_group_get_device(grp, vlan_id);
 
-	if (!skb->dev)
+	if (vlan_dev)
+		skb->dev = vlan_dev;
+	else if (vlan_id)
 		goto drop;
 
 	for (p = napi->gro_list; p; p = p->next) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 7cb285f96b99..3d59c9bf8feb 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -142,6 +142,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 {
 	struct vlan_hdr *vhdr;
 	struct vlan_rx_stats *rx_stats;
+	struct net_device *vlan_dev;
 	u16 vlan_id;
 	u16 vlan_tci;
 
@@ -157,55 +158,71 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
 	vlan_id = vlan_tci & VLAN_VID_MASK;
 
 	rcu_read_lock();
-	skb->dev = __find_vlan_dev(dev, vlan_id);
-	if (!skb->dev) {
-		pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
-			 __func__, vlan_id, dev->name);
-		goto err_unlock;
-	}
-
-	rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
-			       smp_processor_id());
-	u64_stats_update_begin(&rx_stats->syncp);
-	rx_stats->rx_packets++;
-	rx_stats->rx_bytes += skb->len;
-
-	skb_pull_rcsum(skb, VLAN_HLEN);
-
-	skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
+	vlan_dev = __find_vlan_dev(dev, vlan_id);
 
-	pr_debug("%s: priority: %u for TCI: %hu\n",
-		 __func__, skb->priority, vlan_tci);
-
-	switch (skb->pkt_type) {
-	case PACKET_BROADCAST: /* Yeah, stats collect these together.. */
-		/* stats->broadcast ++; // no such counter :-( */
-		break;
-
-	case PACKET_MULTICAST:
-		rx_stats->rx_multicast++;
-		break;
+	/* If the VLAN device is defined, we use it.
+	 * If not, and the VID is 0, it is a 802.1p packet (not
+	 * really a VLAN), so we will just netif_rx it later to the
+	 * original interface, but with the skb->proto set to the
+	 * wrapped proto: we do nothing here.
+	 */
 
-	case PACKET_OTHERHOST:
-		/* Our lower layer thinks this is not local, let's make sure.
-		 * This allows the VLAN to have a different MAC than the
-		 * underlying device, and still route correctly.
-		 */
-		if (!compare_ether_addr(eth_hdr(skb)->h_dest,
-					skb->dev->dev_addr))
-			skb->pkt_type = PACKET_HOST;
-		break;
-	default:
-		break;
+	if (!vlan_dev) {
+		if (vlan_id) {
+			pr_debug("%s: ERROR: No net_device for VID: %u on dev: %s\n",
+				 __func__, vlan_id, dev->name);
+			goto err_unlock;
+		}
+		rx_stats = NULL;
+	} else {
+		skb->dev = vlan_dev;
+
+		rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats,
+					smp_processor_id());
+		u64_stats_update_begin(&rx_stats->syncp);
+		rx_stats->rx_packets++;
+		rx_stats->rx_bytes += skb->len;
+
+		skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci);
+
+		pr_debug("%s: priority: %u for TCI: %hu\n",
+			 __func__, skb->priority, vlan_tci);
+
+		switch (skb->pkt_type) {
+		case PACKET_BROADCAST:
+			/* Yeah, stats collect these together.. */
+			/* stats->broadcast ++; // no such counter :-( */
+			break;
+
+		case PACKET_MULTICAST:
+			rx_stats->rx_multicast++;
+			break;
+
+		case PACKET_OTHERHOST:
+			/* Our lower layer thinks this is not local, let's make
+			 * sure.
+			 * This allows the VLAN to have a different MAC than the
+			 * underlying device, and still route correctly.
+			 */
+			if (!compare_ether_addr(eth_hdr(skb)->h_dest,
+						skb->dev->dev_addr))
+				skb->pkt_type = PACKET_HOST;
+			break;
+		default:
+			break;
+		}
+		u64_stats_update_end(&rx_stats->syncp);
 	}
-	u64_stats_update_end(&rx_stats->syncp);
 
+	skb_pull_rcsum(skb, VLAN_HLEN);
 	vlan_set_encap_proto(skb, vhdr);
 
-	skb = vlan_check_reorder_header(skb);
-	if (!skb) {
-		rx_stats->rx_errors++;
-		goto err_unlock;
+	if (vlan_dev) {
+		skb = vlan_check_reorder_header(skb);
+		if (!skb) {
+			rx_stats->rx_errors++;
+			goto err_unlock;
+		}
 	}
 
 	netif_rx(skb);
-- 
cgit v1.2.3-58-ga151


From 28b041139e344ecd0f144d6205b004ae354cfa1e Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 08:48:55 +0000
Subject: net: preserve ifreq parameter when calling generic phy_mii_ioctl().

The phy_mii_ioctl() function unnecessarily throws away the original ifreq.
We need access to the ifreq in order to support PHYs that can perform
hardware time stamping.

Two maverick drivers filter the ioctl commands passed to phy_mii_ioctl().
This is unnecessary since phylib will check the command in any case.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arm/ixp4xx_eth.c           |  3 ++-
 drivers/net/au1000_eth.c               |  2 +-
 drivers/net/bcm63xx_enet.c             |  2 +-
 drivers/net/cpmac.c                    |  5 +----
 drivers/net/dnet.c                     |  2 +-
 drivers/net/ethoc.c                    |  2 +-
 drivers/net/fec.c                      |  2 +-
 drivers/net/fec_mpc52xx.c              |  2 +-
 drivers/net/fs_enet/fs_enet-main.c     |  3 +--
 drivers/net/gianfar.c                  |  2 +-
 drivers/net/macb.c                     |  2 +-
 drivers/net/mv643xx_eth.c              |  2 +-
 drivers/net/octeon/octeon_mgmt.c       |  2 +-
 drivers/net/phy/phy.c                  |  3 ++-
 drivers/net/sb1250-mac.c               |  2 +-
 drivers/net/sh_eth.c                   |  2 +-
 drivers/net/smsc911x.c                 |  2 +-
 drivers/net/smsc9420.c                 |  2 +-
 drivers/net/stmmac/stmmac_main.c       | 22 ++++++++--------------
 drivers/net/tc35815.c                  |  2 +-
 drivers/net/tg3.c                      |  2 +-
 drivers/net/ucc_geth.c                 |  2 +-
 drivers/staging/octeon/ethernet-mdio.c |  2 +-
 include/linux/phy.h                    |  2 +-
 net/dsa/slave.c                        |  3 +--
 25 files changed, 34 insertions(+), 43 deletions(-)

(limited to 'net')

diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index ee2f8425dbe7..4f1cc7164ad9 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -782,7 +782,8 @@ static int eth_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 
 	if (!netif_running(dev))
 		return -EINVAL;
-	return phy_mii_ioctl(port->phydev, if_mii(req), cmd);
+
+	return phy_mii_ioctl(port->phydev, req, cmd);
 }
 
 /* ethtool support */
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index ece6128bef14..386d4feec652 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -978,7 +978,7 @@ static int au1000_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!aup->phy_dev)
 		return -EINVAL; /* PHY not controllable */
 
-	return phy_mii_ioctl(aup->phy_dev, if_mii(rq), cmd);
+	return phy_mii_ioctl(aup->phy_dev, rq, cmd);
 }
 
 static const struct net_device_ops au1000_netdev_ops = {
diff --git a/drivers/net/bcm63xx_enet.c b/drivers/net/bcm63xx_enet.c
index faf5add894d7..0d2c5da08937 100644
--- a/drivers/net/bcm63xx_enet.c
+++ b/drivers/net/bcm63xx_enet.c
@@ -1496,7 +1496,7 @@ static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (priv->has_phy) {
 		if (!priv->phydev)
 			return -ENODEV;
-		return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
+		return phy_mii_ioctl(priv->phydev, rq, cmd);
 	} else {
 		struct mii_if_info mii;
 
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 38de1a4f825f..e1f6156b3710 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -846,11 +846,8 @@ static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		return -EINVAL;
 	if (!priv->phy)
 		return -EINVAL;
-	if ((cmd == SIOCGMIIPHY) || (cmd == SIOCGMIIREG) ||
-	    (cmd == SIOCSMIIREG))
-		return phy_mii_ioctl(priv->phy, if_mii(ifr), cmd);
 
-	return -EOPNOTSUPP;
+	return phy_mii_ioctl(priv->phy, ifr, cmd);
 }
 
 static int cpmac_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
diff --git a/drivers/net/dnet.c b/drivers/net/dnet.c
index 8b0f50bbf3e5..4ea7141f525d 100644
--- a/drivers/net/dnet.c
+++ b/drivers/net/dnet.c
@@ -797,7 +797,7 @@ static int dnet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
 static void dnet_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 5bb6bb74c40e..38c282e6565b 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -730,7 +730,7 @@ static int ethoc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		phy = priv->phy;
 	}
 
-	return phy_mii_ioctl(phy, mdio, cmd);
+	return phy_mii_ioctl(phy, ifr, cmd);
 }
 
 static int ethoc_config(struct net_device *dev, struct ifmap *map)
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 937f1b4a3483..391a553a3add 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -825,7 +825,7 @@ static int fec_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
 static void fec_enet_free_buffers(struct net_device *dev)
diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
index 5f8346369b80..d1a5b17b2a95 100644
--- a/drivers/net/fec_mpc52xx.c
+++ b/drivers/net/fec_mpc52xx.c
@@ -826,7 +826,7 @@ static int mpc52xx_fec_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!priv->phydev)
 		return -ENOTSUPP;
 
-	return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
 }
 
 static const struct net_device_ops mpc52xx_fec_netdev_ops = {
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index 309a0eaddd81..f08cff9020bd 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -963,12 +963,11 @@ static const struct ethtool_ops fs_ethtool_ops = {
 static int fs_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
-	struct mii_ioctl_data *mii = (struct mii_ioctl_data *)&rq->ifr_data;
 
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	return phy_mii_ioctl(fep->phydev, mii, cmd);
+	return phy_mii_ioctl(fep->phydev, rq, cmd);
 }
 
 extern int fs_mii_connect(struct net_device *dev);
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index 746a776a1653..27f02970d898 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -847,7 +847,7 @@ static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!priv->phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
 }
 
 static unsigned int reverse_bitmap(unsigned int bit_map, unsigned int max_qs)
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 40797fbdca9f..ff2f158ab0b9 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -1082,7 +1082,7 @@ static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
 static const struct net_device_ops macb_netdev_ops = {
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index af075af20e0c..2fcdb1e1b99d 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -2461,7 +2461,7 @@ static int mv643xx_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
 
 	if (mp->phy != NULL)
-		return phy_mii_ioctl(mp->phy, if_mii(ifr), cmd);
+		return phy_mii_ioctl(mp->phy, ifr, cmd);
 
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/net/octeon/octeon_mgmt.c b/drivers/net/octeon/octeon_mgmt.c
index f4a0f08e14e1..b264f0f45605 100644
--- a/drivers/net/octeon/octeon_mgmt.c
+++ b/drivers/net/octeon/octeon_mgmt.c
@@ -620,7 +620,7 @@ static int octeon_mgmt_ioctl(struct net_device *netdev,
 	if (!p->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(p->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(p->phydev, rq, cmd);
 }
 
 static void octeon_mgmt_adjust_link(struct net_device *netdev)
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 64be4664ccab..bd88d818f082 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -309,8 +309,9 @@ EXPORT_SYMBOL(phy_ethtool_gset);
  * current state.  Use at own risk.
  */
 int phy_mii_ioctl(struct phy_device *phydev,
-		struct mii_ioctl_data *mii_data, int cmd)
+		struct ifreq *ifr, int cmd)
 {
+	struct mii_ioctl_data *mii_data = if_mii(ifr);
 	u16 val = mii_data->val_in;
 
 	switch (cmd) {
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 79eee3062083..8e6bd45b9f31 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -2532,7 +2532,7 @@ static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!netif_running(dev) || !sc->phy_dev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(sc->phy_dev, if_mii(rq), cmd);
+	return phy_mii_ioctl(sc->phy_dev, rq, cmd);
 }
 
 static int sbmac_close(struct net_device *dev)
diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c
index 7ac814d932b1..32f2deaa38bb 100644
--- a/drivers/net/sh_eth.c
+++ b/drivers/net/sh_eth.c
@@ -1284,7 +1284,7 @@ static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq,
 	if (!phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
 #if defined(SH_ETH_HAS_TSU)
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index cc559741b0fa..56dc2ff75ee3 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1538,7 +1538,7 @@ static int smsc911x_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	if (!netif_running(dev) || !pdata->phy_dev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(pdata->phy_dev, if_mii(ifr), cmd);
+	return phy_mii_ioctl(pdata->phy_dev, ifr, cmd);
 }
 
 static int
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index 6cdee6a15f9f..b09ee1c319e8 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -245,7 +245,7 @@ static int smsc9420_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	if (!netif_running(dev) || !pd->phy_dev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(pd->phy_dev, if_mii(ifr), cmd);
+	return phy_mii_ioctl(pd->phy_dev, ifr, cmd);
 }
 
 static int smsc9420_ethtool_get_settings(struct net_device *dev,
diff --git a/drivers/net/stmmac/stmmac_main.c b/drivers/net/stmmac/stmmac_main.c
index a31d580f306d..acf061686940 100644
--- a/drivers/net/stmmac/stmmac_main.c
+++ b/drivers/net/stmmac/stmmac_main.c
@@ -1437,24 +1437,18 @@ static void stmmac_poll_controller(struct net_device *dev)
 static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	int ret = -EOPNOTSUPP;
+	int ret;
 
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	switch (cmd) {
-	case SIOCGMIIPHY:
-	case SIOCGMIIREG:
-	case SIOCSMIIREG:
-		if (!priv->phydev)
-			return -EINVAL;
-
-		spin_lock(&priv->lock);
-		ret = phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
-		spin_unlock(&priv->lock);
-	default:
-		break;
-	}
+	if (!priv->phydev)
+		return -EINVAL;
+
+	spin_lock(&priv->lock);
+	ret = phy_mii_ioctl(priv->phydev, rq, cmd);
+	spin_unlock(&priv->lock);
+
 	return ret;
 }
 
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index be08b75dbc15..99e423a5b9f1 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -2066,7 +2066,7 @@ static int tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		return -EINVAL;
 	if (!lp->phy_dev)
 		return -ENODEV;
-	return phy_mii_ioctl(lp->phy_dev, if_mii(rq), cmd);
+	return phy_mii_ioctl(lp->phy_dev, rq, cmd);
 }
 
 static void tc35815_chip_reset(struct net_device *dev)
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 5769e1507d2f..b26a57782939 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -10932,7 +10932,7 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED))
 			return -EAGAIN;
 		phydev = tp->mdio_bus->phy_map[TG3_PHY_MII_ADDR];
-		return phy_mii_ioctl(phydev, data, cmd);
+		return phy_mii_ioctl(phydev, ifr, cmd);
 	}
 
 	switch (cmd) {
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index dc32a62e611c..e17dd7430915 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -3714,7 +3714,7 @@ static int ucc_geth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!ugeth->phydev)
 		return -ENODEV;
 
-	return phy_mii_ioctl(ugeth->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(ugeth->phydev, rq, cmd);
 }
 
 static const struct net_device_ops ucc_geth_netdev_ops = {
diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c
index 7e0be8d00dc3..10a82ef30215 100644
--- a/drivers/staging/octeon/ethernet-mdio.c
+++ b/drivers/staging/octeon/ethernet-mdio.c
@@ -113,7 +113,7 @@ int cvm_oct_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	if (!priv->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
+	return phy_mii_ioctl(priv->phydev, rq, cmd);
 }
 
 static void cvm_oct_adjust_link(struct net_device *dev)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 987e111f7b11..d63736a84002 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -498,7 +498,7 @@ void phy_stop_machine(struct phy_device *phydev);
 int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 int phy_mii_ioctl(struct phy_device *phydev,
-		struct mii_ioctl_data *mii_data, int cmd);
+		struct ifreq *ifr, int cmd);
 int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8fdca56bb08f..64ca2a6fa0d4 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -164,10 +164,9 @@ out:
 static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct mii_ioctl_data *mii_data = if_mii(ifr);
 
 	if (p->phy != NULL)
-		return phy_mii_ioctl(p->phy, mii_data, cmd);
+		return phy_mii_ioctl(p->phy, ifr, cmd);
 
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3-58-ga151


From c1f19b51d1d87f3e3bb7e6648f43f7d57ed2da6b Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sat, 17 Jul 2010 08:49:36 +0000
Subject: net: support time stamping in phy devices.

This patch adds a new networking option to allow hardware time stamps
from PHY devices. When enabled, likely candidates among incoming and
outgoing network packets are offered to the PHY driver for possible
time stamping. When accepted by the PHY driver, incoming packets are
deferred for later delivery by the driver.

The patch also adds phylib driver methods for the SIOCSHWTSTAMP ioctl
and callbacks for transmit and receive time stamping. Drivers may
optionally implement these functions.

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        |   5 ++
 drivers/net/phy/phy_device.c |   2 +
 include/linux/netdevice.h    |   4 ++
 include/linux/phy.h          |  22 ++++++++
 include/linux/skbuff.h       |  31 +++++++++++
 net/Kconfig                  |  10 ++++
 net/core/Makefile            |   2 +-
 net/core/dev.c               |   3 ++
 net/core/timestamping.c      | 126 +++++++++++++++++++++++++++++++++++++++++++
 net/socket.c                 |   4 ++
 10 files changed, 208 insertions(+), 1 deletion(-)
 create mode 100644 net/core/timestamping.c

(limited to 'net')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index bd88d818f082..5130db8f5c4e 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -361,6 +361,11 @@ int phy_mii_ioctl(struct phy_device *phydev,
 		}
 		break;
 
+	case SIOCSHWTSTAMP:
+		if (phydev->drv->hwtstamp)
+			return phydev->drv->hwtstamp(phydev, ifr);
+		/* fall through */
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 1a99bb244106..c0761197c07e 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -460,6 +460,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	}
 
 	phydev->attached_dev = dev;
+	dev->phydev = phydev;
 
 	phydev->dev_flags = flags;
 
@@ -513,6 +514,7 @@ EXPORT_SYMBOL(phy_attach);
  */
 void phy_detach(struct phy_device *phydev)
 {
+	phydev->attached_dev->phydev = NULL;
 	phydev->attached_dev = NULL;
 
 	/* If the device had no specific driver before (i.e. - it
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c4fedf000541..fdc3f2992230 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -54,6 +54,7 @@
 
 struct vlan_group;
 struct netpoll_info;
+struct phy_device;
 /* 802.11 specific */
 struct wireless_dev;
 					/* source back-compat hooks */
@@ -1065,6 +1066,9 @@ struct net_device {
 #endif
 	/* n-tuple filter list attached to this device */
 	struct ethtool_rx_ntuple_list ethtool_ntuple_list;
+
+	/* phy device may attach itself for hardware timestamping */
+	struct phy_device *phydev;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index d63736a84002..6b0a782c6224 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -234,6 +234,8 @@ enum phy_state {
 	PHY_RESUMING
 };
 
+struct sk_buff;
+
 /* phy_device: An instance of a PHY
  *
  * drv: Pointer to the driver for this PHY instance
@@ -402,6 +404,26 @@ struct phy_driver {
 	/* Clears up any memory if needed */
 	void (*remove)(struct phy_device *phydev);
 
+	/* Handles SIOCSHWTSTAMP ioctl for hardware time stamping. */
+	int  (*hwtstamp)(struct phy_device *phydev, struct ifreq *ifr);
+
+	/*
+	 * Requests a Rx timestamp for 'skb'. If the skb is accepted,
+	 * the phy driver promises to deliver it using netif_rx() as
+	 * soon as a timestamp becomes available. One of the
+	 * PTP_CLASS_ values is passed in 'type'. The function must
+	 * return true if the skb is accepted for delivery.
+	 */
+	bool (*rxtstamp)(struct phy_device *dev, struct sk_buff *skb, int type);
+
+	/*
+	 * Requests a Tx timestamp for 'skb'. The phy driver promises
+	 * to deliver it to the socket's error queue as soon as a
+	 * timestamp becomes available. One of the PTP_CLASS_ values
+	 * is passed in 'type'.
+	 */
+	void (*txtstamp)(struct phy_device *dev, struct sk_buff *skb, int type);
+
 	struct device_driver driver;
 };
 #define to_phy_driver(d) container_of(d, struct phy_driver, driver)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a1b0400c8d86..f5aa87e1e0c8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1933,6 +1933,36 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
+extern void skb_timestamping_init(void);
+
+#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
+
+extern void skb_clone_tx_timestamp(struct sk_buff *skb);
+extern bool skb_defer_rx_timestamp(struct sk_buff *skb);
+
+#else /* CONFIG_NETWORK_PHY_TIMESTAMPING */
+
+static inline void skb_clone_tx_timestamp(struct sk_buff *skb)
+{
+}
+
+static inline bool skb_defer_rx_timestamp(struct sk_buff *skb)
+{
+	return false;
+}
+
+#endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */
+
+/**
+ * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps
+ *
+ * @skb: clone of the the original outgoing packet
+ * @hwtstamps: hardware time stamps
+ *
+ */
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+			       struct skb_shared_hwtstamps *hwtstamps);
+
 /**
  * skb_tstamp_tx - queue clone of skb with send time stamps
  * @orig_skb:	the original outgoing packet
@@ -1965,6 +1995,7 @@ static inline void sw_tx_timestamp(struct sk_buff *skb)
  */
 static inline void skb_tx_timestamp(struct sk_buff *skb)
 {
+	skb_clone_tx_timestamp(skb);
 	sw_tx_timestamp(skb);
 }
 
diff --git a/net/Kconfig b/net/Kconfig
index 0d68b40fc0e6..b3250944cde9 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -86,6 +86,16 @@ config NETWORK_SECMARK
 	  to nfmark, but designated for security purposes.
 	  If you are unsure how to answer this question, answer N.
 
+config NETWORK_PHY_TIMESTAMPING
+	bool "Timestamping in PHY devices"
+	depends on EXPERIMENTAL
+	help
+	  This allows timestamping of network packets by PHYs with
+	  hardware timestamping capabilities. This option adds some
+	  overhead in the transmit and receive paths.
+
+	  If you are unsure how to answer this question, answer N.
+
 menuconfig NETFILTER
 	bool "Network packet filtering framework (Netfilter)"
 	---help---
diff --git a/net/core/Makefile b/net/core/Makefile
index 51c3eec850ef..8a04dd22cf77 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,4 +18,4 @@ obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
-
+obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
diff --git a/net/core/dev.c b/net/core/dev.c
index e2b9fa2c917e..1c002c7ef5d5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2957,6 +2957,9 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (netdev_tstamp_prequeue)
 		net_timestamp_check(skb);
 
+	if (skb_defer_rx_timestamp(skb))
+		return NET_RX_SUCCESS;
+
 #ifdef CONFIG_RPS
 	{
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
new file mode 100644
index 000000000000..0ae6c22da85b
--- /dev/null
+++ b/net/core/timestamping.c
@@ -0,0 +1,126 @@
+/*
+ * PTP 1588 clock support - support for timestamping in PHY devices
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/errqueue.h>
+#include <linux/phy.h>
+#include <linux/ptp_classify.h>
+#include <linux/skbuff.h>
+
+static struct sock_filter ptp_filter[] = {
+	PTP_FILTER
+};
+
+static unsigned int classify(struct sk_buff *skb)
+{
+	if (likely(skb->dev &&
+		   skb->dev->phydev &&
+		   skb->dev->phydev->drv))
+		return sk_run_filter(skb, ptp_filter, ARRAY_SIZE(ptp_filter));
+	else
+		return PTP_CLASS_NONE;
+}
+
+void skb_clone_tx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	struct sk_buff *clone;
+	struct sock *sk = skb->sk;
+	unsigned int type;
+
+	if (!sk)
+		return;
+
+	type = classify(skb);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->txtstamp)) {
+			clone = skb_clone(skb, GFP_ATOMIC);
+			if (!clone)
+				return;
+			clone->sk = sk;
+			phydev->drv->txtstamp(phydev, clone, type);
+		}
+		break;
+	default:
+		break;
+	}
+}
+
+void skb_complete_tx_timestamp(struct sk_buff *skb,
+			       struct skb_shared_hwtstamps *hwtstamps)
+{
+	struct sock *sk = skb->sk;
+	struct sock_exterr_skb *serr;
+	int err;
+
+	if (!hwtstamps)
+		return;
+
+	*skb_hwtstamps(skb) = *hwtstamps;
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+	skb->sk = NULL;
+	err = sock_queue_err_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_tx_timestamp);
+
+bool skb_defer_rx_timestamp(struct sk_buff *skb)
+{
+	struct phy_device *phydev;
+	unsigned int type;
+
+	skb_push(skb, ETH_HLEN);
+
+	type = classify(skb);
+
+	skb_pull(skb, ETH_HLEN);
+
+	switch (type) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case PTP_CLASS_V2_VLAN:
+		phydev = skb->dev->phydev;
+		if (likely(phydev->drv->rxtstamp))
+			return phydev->drv->rxtstamp(phydev, skb, type);
+		break;
+	default:
+		break;
+	}
+
+	return false;
+}
+
+void __init skb_timestamping_init(void)
+{
+	BUG_ON(sk_chk_filter(ptp_filter, ARRAY_SIZE(ptp_filter)));
+}
diff --git a/net/socket.c b/net/socket.c
index 6fe484122a44..2270b941bcc7 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2394,6 +2394,10 @@ static int __init sock_init(void)
 	netfilter_init();
 #endif
 
+#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
+	skb_timestamping_init();
+#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From 6accec76f6889f85dd62cadefe18afb8343558d1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 18 Jul 2010 19:52:55 -0700
Subject: sch_atm: Convert to use standard list_head facilities.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_atm.c | 98 ++++++++++++++++++++++-------------------------------
 1 file changed, 41 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index fcbb86a486a2..e114f23d5eae 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -52,7 +52,7 @@ struct atm_flow_data {
 	int			ref;		/* reference count */
 	struct gnet_stats_basic_packed	bstats;
 	struct gnet_stats_queue	qstats;
-	struct atm_flow_data	*next;
+	struct list_head	list;
 	struct atm_flow_data	*excess;	/* flow for excess traffic;
 						   NULL to set CLP instead */
 	int			hdr_len;
@@ -61,34 +61,23 @@ struct atm_flow_data {
 
 struct atm_qdisc_data {
 	struct atm_flow_data	link;		/* unclassified skbs go here */
-	struct atm_flow_data	*flows;		/* NB: "link" is also on this
+	struct list_head	flows;		/* NB: "link" is also on this
 						   list */
 	struct tasklet_struct	task;		/* dequeue tasklet */
 };
 
 /* ------------------------- Class/flow operations ------------------------- */
 
-static int find_flow(struct atm_qdisc_data *qdisc, struct atm_flow_data *flow)
-{
-	struct atm_flow_data *walk;
-
-	pr_debug("find_flow(qdisc %p,flow %p)\n", qdisc, flow);
-	for (walk = qdisc->flows; walk; walk = walk->next)
-		if (walk == flow)
-			return 1;
-	pr_debug("find_flow: not found\n");
-	return 0;
-}
-
 static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow;
 
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
 		if (flow->classid == classid)
-			break;
-	return flow;
+			return flow;
+	}
+	return NULL;
 }
 
 static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
@@ -99,7 +88,7 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
 
 	pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
 		sch, p, flow, new, old);
-	if (!find_flow(p, flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	if (!new)
 		new = &noop_qdisc;
@@ -146,20 +135,12 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 	struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-	struct atm_flow_data **prev;
 
 	pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
 	if (--flow->ref)
 		return;
 	pr_debug("atm_tc_put: destroying\n");
-	for (prev = &p->flows; *prev; prev = &(*prev)->next)
-		if (*prev == flow)
-			break;
-	if (!*prev) {
-		printk(KERN_CRIT "atm_tc_put: class %p not found\n", flow);
-		return;
-	}
-	*prev = flow->next;
+	list_del_init(&flow->list);
 	pr_debug("atm_tc_put: qdisc %p\n", flow->q);
 	qdisc_destroy(flow->q);
 	tcf_destroy_chain(&flow->filter_list);
@@ -274,7 +255,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 			error = -EINVAL;
 			goto err_out;
 		}
-		if (find_flow(p, flow)) {
+		if (!list_empty(&flow->list)) {
 			error = -EEXIST;
 			goto err_out;
 		}
@@ -313,8 +294,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
 	flow->classid = classid;
 	flow->ref = 1;
 	flow->excess = excess;
-	flow->next = p->link.next;
-	p->link.next = flow;
+	list_add(&flow->list, &p->link.list);
 	flow->hdr_len = hdr_len;
 	if (hdr)
 		memcpy(flow->hdr, hdr, hdr_len);
@@ -335,7 +315,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
 
 	pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
-	if (!find_flow(qdisc_priv(sch), flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	if (flow->filter_list || flow == &p->link)
 		return -EBUSY;
@@ -361,12 +341,12 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 	pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
 	if (walker->stop)
 		return;
-	for (flow = p->flows; flow; flow = flow->next) {
-		if (walker->count >= walker->skip)
-			if (walker->fn(sch, (unsigned long)flow, walker) < 0) {
-				walker->stop = 1;
-				break;
-			}
+	list_for_each_entry(flow, &p->flows, list) {
+		if (walker->count >= walker->skip &&
+		    walker->fn(sch, (unsigned long)flow, walker) < 0) {
+			walker->stop = 1;
+			break;
+		}
 		walker->count++;
 	}
 }
@@ -385,16 +365,17 @@ static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl)
 static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow = NULL;	/* @@@ */
+	struct atm_flow_data *flow;
 	struct tcf_result res;
 	int result;
 	int ret = NET_XMIT_POLICED;
 
 	pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
 	result = TC_POLICE_OK;	/* be nice to gcc */
+	flow = NULL;
 	if (TC_H_MAJ(skb->priority) != sch->handle ||
-	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority)))
-		for (flow = p->flows; flow; flow = flow->next)
+	    !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) {
+		list_for_each_entry(flow, &p->flows, list) {
 			if (flow->filter_list) {
 				result = tc_classify_compat(skb,
 							    flow->filter_list,
@@ -404,8 +385,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				flow = (struct atm_flow_data *)res.class;
 				if (!flow)
 					flow = lookup_flow(sch, res.classid);
-				break;
+				goto done;
 			}
+		}
+		flow = NULL;
+	done:
+		;		
+	}
 	if (!flow)
 		flow = &p->link;
 	else {
@@ -477,7 +463,9 @@ static void sch_atm_dequeue(unsigned long data)
 	struct sk_buff *skb;
 
 	pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->link.next; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
+		if (flow == &p->link)
+			continue;
 		/*
 		 * If traffic is properly shaped, this won't generate nasty
 		 * little bursts. Otherwise, it may ... (but that's okay)
@@ -512,6 +500,7 @@ static void sch_atm_dequeue(unsigned long data)
 			/* atm.atm_options are already set by atm_tc_enqueue */
 			flow->vcc->send(flow->vcc, skb);
 		}
+	}
 }
 
 static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
@@ -543,9 +532,10 @@ static unsigned int atm_tc_drop(struct Qdisc *sch)
 	unsigned int len;
 
 	pr_debug("atm_tc_drop(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list) {
 		if (flow->q->ops->drop && (len = flow->q->ops->drop(flow->q)))
 			return len;
+	}
 	return 0;
 }
 
@@ -554,7 +544,9 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	struct atm_qdisc_data *p = qdisc_priv(sch);
 
 	pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
-	p->flows = &p->link;
+	INIT_LIST_HEAD(&p->flows);
+	INIT_LIST_HEAD(&p->link.list);
+	list_add(&p->link.list, &p->flows);
 	p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
 				      &pfifo_qdisc_ops, sch->handle);
 	if (!p->link.q)
@@ -565,7 +557,6 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
 	p->link.sock = NULL;
 	p->link.classid = sch->handle;
 	p->link.ref = 1;
-	p->link.next = NULL;
 	tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
 	return 0;
 }
@@ -576,7 +567,7 @@ static void atm_tc_reset(struct Qdisc *sch)
 	struct atm_flow_data *flow;
 
 	pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list)
 		qdisc_reset(flow->q);
 	sch->q.qlen = 0;
 }
@@ -584,24 +575,17 @@ static void atm_tc_reset(struct Qdisc *sch)
 static void atm_tc_destroy(struct Qdisc *sch)
 {
 	struct atm_qdisc_data *p = qdisc_priv(sch);
-	struct atm_flow_data *flow;
+	struct atm_flow_data *flow, *tmp;
 
 	pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
-	for (flow = p->flows; flow; flow = flow->next)
+	list_for_each_entry(flow, &p->flows, list)
 		tcf_destroy_chain(&flow->filter_list);
 
-	/* races ? */
-	while ((flow = p->flows)) {
+	list_for_each_entry_safe(flow, tmp, &p->flows, list) {
 		if (flow->ref > 1)
 			printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
 			       flow->ref);
 		atm_tc_put(sch, (unsigned long)flow);
-		if (p->flows == flow) {
-			printk(KERN_ERR "atm_destroy: putting flow %p didn't "
-			       "kill it\n", flow);
-			p->flows = flow->next;	/* brute force */
-			break;
-		}
 	}
 	tasklet_kill(&p->task);
 }
@@ -615,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
 
 	pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
 		sch, p, flow, skb, tcm);
-	if (!find_flow(p, flow))
+	if (list_empty(&flow->list))
 		return -EINVAL;
 	tcm->tcm_handle = flow->classid;
 	tcm->tcm_info = flow->q->handle;
-- 
cgit v1.2.3-58-ga151


From bd27290a593f80cb99e95287cb29c72c0d57608b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Jul 2010 09:35:40 -0700
Subject: net: 64bit stats for netdev_queue

Since struct netdev_queue tx_bytes/tx_packets/tx_dropped are already
protected by _xmit_lock, its easy to convert these fields to u64 instead
of unsigned long.
This completes 64bit stats for devices using them (vlan, macvlan, ...)

Strictly, we could avoid the locking in dev_txq_stats_fold() on 64bit
arches, but its slow path and we prefer keep it simple.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +++---
 net/core/dev.c            | 4 +++-
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fdc3f2992230..b6262898ece0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -501,9 +501,9 @@ struct netdev_queue {
 	 * please use this field instead of dev->trans_start
 	 */
 	unsigned long		trans_start;
-	unsigned long		tx_bytes;
-	unsigned long		tx_packets;
-	unsigned long		tx_dropped;
+	u64			tx_bytes;
+	u64			tx_packets;
+	u64			tx_dropped;
 } ____cacheline_aligned_in_smp;
 
 #ifdef CONFIG_RPS
diff --git a/net/core/dev.c b/net/core/dev.c
index 1c002c7ef5d5..9de75cdade56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5282,15 +5282,17 @@ void netdev_run_todo(void)
 void dev_txq_stats_fold(const struct net_device *dev,
 			struct rtnl_link_stats64 *stats)
 {
-	unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
+	u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
 	unsigned int i;
 	struct netdev_queue *txq;
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		txq = netdev_get_tx_queue(dev, i);
+		spin_lock_bh(&txq->_xmit_lock);
 		tx_bytes   += txq->tx_bytes;
 		tx_packets += txq->tx_packets;
 		tx_dropped += txq->tx_dropped;
+		spin_unlock_bh(&txq->_xmit_lock);
 	}
 	if (tx_bytes || tx_packets || tx_dropped) {
 		stats->tx_bytes   = tx_bytes;
-- 
cgit v1.2.3-58-ga151


From 45e77d314585869dfe43c82679f7e08c9b35b898 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 19 Jul 2010 01:16:18 +0000
Subject: tcp: fix crash in tcp_xmit_retransmit_queue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It can happen that there are no packets in queue while calling
tcp_xmit_retransmit_queue(). tcp_write_queue_head() then returns
NULL and that gets deref'ed to get sacked into a local var.

There is no work to do if no packets are outstanding so we just
exit early.

This oops was introduced by 08ebd1721ab8fd (tcp: remove tp->lost_out
guard to make joining diff nicer).

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Reported-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
Tested-by: Lennart Schulte <lennart.schulte@nets.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b4ed957f201a..7ed9dc1042d1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2208,6 +2208,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	int mib_idx;
 	int fwd_rexmitting = 0;
 
+	if (!tp->packets_out)
+		return;
+
 	if (!tp->lost_out)
 		tp->retransmit_high = tp->snd_una;
 
-- 
cgit v1.2.3-58-ga151


From d6d9ca0fec6aea0f2e4064474a1c5cdbed873c63 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Jul 2010 10:48:49 +0000
Subject: net: this_cpu_xxx conversions

Use modern this_cpu_xxx() api, saving few bytes on x86

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c         | 5 ++---
 net/sched/sch_generic.c | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/sock.c b/net/core/sock.c
index 363bc260157c..b05b9b6ddb87 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2232,8 +2232,7 @@ static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
 #ifdef CONFIG_NET_NS
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-	int cpu = smp_processor_id();
-	per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
@@ -2279,7 +2278,7 @@ static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
 
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-	__get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
+	__this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d20fcd2a5519..2aeb3a4386a1 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -96,7 +96,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 * Another cpu is holding lock, requeue & delay xmits for
 		 * some time.
 		 */
-		__get_cpu_var(softnet_data).cpu_collision++;
+		__this_cpu_inc(softnet_data.cpu_collision);
 		ret = dev_requeue_skb(skb, q);
 	}
 
-- 
cgit v1.2.3-58-ga151


From 573201f36fd9c7c6d5218cdcd9948cee700b277d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 19 Jul 2010 19:26:45 +0000
Subject: bridge: Partially disable netpoll support

The new netpoll code in bridging contains use-after-free bugs
that are non-trivial to fix.

This patch fixes this by removing the code that uses skbs after
they're freed.

As a consequence, this means that we can no longer call bridge
from the netpoll path, so this patch also removes the controller
function in order to disable netpoll.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Thanks,
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  |  9 ---------
 net/bridge/br_forward.c | 23 +----------------------
 2 files changed, 1 insertion(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index eedf2c94820e..753fc4221f3c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -217,14 +217,6 @@ static bool br_devices_support_netpoll(struct net_bridge *br)
 	return count != 0 && ret;
 }
 
-static void br_poll_controller(struct net_device *br_dev)
-{
-	struct netpoll *np = br_dev->npinfo->netpoll;
-
-	if (np->real_dev != br_dev)
-		netpoll_poll_dev(np->real_dev);
-}
-
 void br_netpoll_cleanup(struct net_device *dev)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -295,7 +287,6 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_do_ioctl		 = br_dev_ioctl,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
-	.ndo_poll_controller	 = br_poll_controller,
 #endif
 };
 
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index a4e72a89e4ff..595da45f9088 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -50,14 +50,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 			kfree_skb(skb);
 		else {
 			skb_push(skb, ETH_HLEN);
-
-#ifdef CONFIG_NET_POLL_CONTROLLER
-			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
-				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
-				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
-			} else
-#endif
-				dev_queue_xmit(skb);
+			dev_queue_xmit(skb);
 		}
 	}
 
@@ -73,23 +66,9 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	struct net_bridge *br = to->br;
-	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
-		struct netpoll *np;
-		to->dev->npinfo = skb->dev->npinfo;
-		np = skb->dev->npinfo->netpoll;
-		np->real_dev = np->dev = to->dev;
-		to->dev->priv_flags |= IFF_IN_NETPOLL;
-	}
-#endif
 	skb->dev = to->dev;
 	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 		br_forward_finish);
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	if (skb->dev->npinfo)
-		skb->dev->npinfo->netpoll->dev = br->dev;
-#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
-- 
cgit v1.2.3-58-ga151


From d79d991379af35d43f003f162e8650e72965b8ca Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 19 Jul 2010 23:51:38 +0000
Subject: __dst_free(): put EXPORT_SYMBOLS after the fct

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index 9920722cc82b..6c41b1fac3db 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -197,7 +197,6 @@ static void ___dst_free(struct dst_entry *dst)
 		dst->input = dst->output = dst_discard;
 	dst->obsolete = 2;
 }
-EXPORT_SYMBOL(__dst_free);
 
 void __dst_free(struct dst_entry *dst)
 {
@@ -213,6 +212,7 @@ void __dst_free(struct dst_entry *dst)
 	}
 	spin_unlock_bh(&dst_garbage.lock);
 }
+EXPORT_SYMBOL(__dst_free);
 
 struct dst_entry *dst_destroy(struct dst_entry * dst)
 {
-- 
cgit v1.2.3-58-ga151


From 4b706372f18de53970e4c6887a96459590fef80a Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 20 Jul 2010 04:52:09 +0000
Subject: drop_monitor: Add error code to detect duplicate state changes

	Patch to add -EAGAIN error to dropwatch netlink message handling code.
-EAGAIN will be returned anytime userspace attempts to transition the state of
the drop monitor service to a state that its already in.  That allows user space
to detect this condition, so it doesn't wait for a success ACK that will never
arrive.  Tested successfully by me

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/drop_monitor.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ad41529fb60f..646ef3bc7200 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -223,6 +223,11 @@ static int set_all_monitor_traces(int state)
 
 	spin_lock(&trace_state_lock);
 
+	if (state == trace_state) {
+		rc = -EAGAIN;
+		goto out_unlock;
+	}
+
 	switch (state) {
 	case TRACE_ON:
 		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
@@ -251,11 +256,12 @@ static int set_all_monitor_traces(int state)
 
 	if (!rc)
 		trace_state = state;
+	else
+		rc = -EINPROGRESS;
 
+out_unlock:
 	spin_unlock(&trace_state_lock);
 
-	if (rc)
-		return -EINPROGRESS;
 	return rc;
 }
 
-- 
cgit v1.2.3-58-ga151


From 70d4bf6d467a330ccc947df9b2608e329d9e7708 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 20 Jul 2010 06:45:56 +0000
Subject: drop_monitor: convert some kfree_skb call sites to consume_skb

Convert a few calls from kfree_skb to consume_skb

Noticed while I was working on dropwatch that I was detecting lots of internal
skb drops in several places.  While some are legitimate, several were not,
freeing skbs that were at the end of their life, rather than being discarded due
to an error.  This patch converts those calls sites from using kfree_skb to
consume_skb, which quiets the in-kernel drop_monitor code from detecting them as
drops.  Tested successfully by myself

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/audit.c           | 2 +-
 net/netlink/af_netlink.c | 9 +++++----
 net/unix/af_unix.c       | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/kernel/audit.c b/kernel/audit.c
index c71bd26631a2..8296aa516c5a 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -407,7 +407,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
 		audit_hold_skb(skb);
 	} else
 		/* drop the extra reference if sent ok */
-		kfree_skb(skb);
+		consume_skb(skb);
 }
 
 static int kauditd_thread(void *dummy)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7aeaa83193db..8648a9922aab 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1076,14 +1076,15 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
 		do_one_broadcast(sk, &info);
 
-	kfree_skb(skb);
+	consume_skb(skb);
 
 	netlink_unlock_table();
 
-	kfree_skb(info.skb2);
-
-	if (info.delivery_failure)
+	if (info.delivery_failure) {
+		kfree_skb(info.skb2);
 		return -ENOBUFS;
+	} else
+		consume_skb(info.skb2);
 
 	if (info.delivered) {
 		if (info.congested && (allocation & __GFP_WAIT))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 75ba48b0d12a..4414a18c63b4 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1906,7 +1906,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 				break;
 			}
 
-			kfree_skb(skb);
+			consume_skb(skb);
 
 			if (siocb->scm->fp)
 				break;
-- 
cgit v1.2.3-58-ga151


From fd059b9bd0af2e217f29d5a34e1c71039d26af8c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 14:22:56 -0300
Subject: Bluetooth: Remove max_tx and tx_window module paramenters from L2CAP

We don't need these parameters anymore since we have socket options for
them.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cf3c4073a8a6..fd507cbc7932 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -60,8 +60,6 @@ static int enable_ertm = 1;
 #else
 static int enable_ertm = 0;
 #endif
-static int max_transmit = L2CAP_DEFAULT_MAX_TX;
-static int tx_window = L2CAP_DEFAULT_TX_WINDOW;
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
@@ -816,9 +814,9 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 			pi->mode = L2CAP_MODE_ERTM;
 		else
 			pi->mode = L2CAP_MODE_BASIC;
-		pi->max_tx = max_transmit;
+		pi->max_tx = L2CAP_DEFAULT_MAX_TX;
 		pi->fcs  = L2CAP_FCS_CRC16;
-		pi->tx_win = tx_window;
+		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
 		pi->sec_level = BT_SECURITY_LOW;
 		pi->role_switch = 0;
 		pi->force_reliable = 0;
@@ -4682,12 +4680,6 @@ module_exit(l2cap_exit);
 module_param(enable_ertm, bool, 0644);
 MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode");
 
-module_param(max_transmit, uint, 0644);
-MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)");
-
-module_param(tx_window, uint, 0644);
-MODULE_PARM_DESC(tx_window, "Transmission window size value (default = 63)");
-
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
 MODULE_VERSION(VERSION);
-- 
cgit v1.2.3-58-ga151


From 0b31c85ce78d3646ac1e90d62969e7cd8cfd8b15 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 14:26:11 -0300
Subject: Bluetooth: Remove L2CAP Extended Features from Kconfig

This reverts commit 84fb0a6334af0ccad3544f6972c055d90fbb9fbe which adds
the L2CAP Extended Features to the Kconfig, that is actually not needed.
One can use other mechanisms to enable L2CAP Extended Features.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/Kconfig | 13 -------------
 net/bluetooth/l2cap.c |  4 ----
 2 files changed, 17 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index ee3b3049d385..ed371684c133 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -43,19 +43,6 @@ config BT_L2CAP
 	  Say Y here to compile L2CAP support into the kernel or say M to
 	  compile it as module (l2cap).
 
-config BT_L2CAP_EXT_FEATURES
-	bool "L2CAP Extended Features support (EXPERIMENTAL)"
-	depends on BT_L2CAP && EXPERIMENTAL
-	help
-	  This option enables the L2CAP Extended Features support. These
-	  new features include the Enhanced Retransmission and Streaming
-	  Modes, the Frame Check Sequence (FCS), and Segmentation and
-	  Reassembly (SAR) for L2CAP packets. They are a required for the
-	  new Alternate MAC/PHY and the Bluetooth Medical Profile.
-
-	  You should say N unless you know what you are doing. Note that
-	  this is in an experimental state yet.
-
 config BT_SCO
 	tristate "SCO links support"
 	depends on BT
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index fd507cbc7932..6c5462b2a244 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -55,11 +55,7 @@
 
 #define VERSION "2.14"
 
-#ifdef CONFIG_BT_L2CAP_EXT_FEATURES
-static int enable_ertm = 1;
-#else
 static int enable_ertm = 0;
-#endif
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
-- 
cgit v1.2.3-58-ga151


From f6337c771126420c348b702e012262cfb5f0d56e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 18:32:04 -0300
Subject: Bluetooth: Fix drop of packets with invalid req_seq/tx_seq

We shall not use an unsigned var since we are expecting negatives value
there. Using unsigned causes ERTM connection to close due to invalid
ReqSeq numbers.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6c5462b2a244..7e39d9ee0b9c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3796,7 +3796,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	u8 tx_seq = __get_txseq(rx_control);
 	u8 req_seq = __get_reqseq(rx_control);
 	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
-	u8 tx_seq_offset, expected_tx_seq_offset;
+	int tx_seq_offset, expected_tx_seq_offset;
 	int num_to_ack = (pi->tx_win/6) + 1;
 	int err = 0;
 
@@ -4081,7 +4081,8 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 	struct sock *sk;
 	struct l2cap_pinfo *pi;
 	u16 control, len;
-	u8 tx_seq, req_seq, next_tx_seq_offset, req_seq_offset;
+	u8 tx_seq, req_seq;
+	int next_tx_seq_offset, req_seq_offset;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
-- 
cgit v1.2.3-58-ga151


From 8ff50ec04a7ecdbba6e0a5423cf6f2c5f7fc605e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 19:34:11 -0300
Subject: Bluetooth: Fix bug with ERTM vars increment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All ERTM operations regarding the txWindow should be modulo 64,
otherwise we confuse the ERTM logic and connections will break.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7e39d9ee0b9c..a9fdfe401f5b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3748,7 +3748,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
 		l2cap_ertm_reassembly_sdu(sk, skb, control);
 		l2cap_pi(sk)->buffer_seq_srej =
 			(l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
-		tx_seq++;
+		tx_seq = (tx_seq + 1) % 64;
 	}
 }
 
@@ -3784,10 +3784,11 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
 		l2cap_send_sframe(pi, control);
 
 		new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
-		new->tx_seq = pi->expected_tx_seq++;
+		new->tx_seq = pi->expected_tx_seq;
+		pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 		list_add_tail(&new->list, SREJ_LIST(sk));
 	}
-	pi->expected_tx_seq++;
+	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 }
 
 static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
-- 
cgit v1.2.3-58-ga151


From bc1b1f8bee63966649dd5ac7d10d31a6556bf19b Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 11 May 2010 22:14:00 -0300
Subject: Bluetooth: Only check SAR bits if frame is an I-frame

The SAR bits doesn't make sense for an S-frame. It doesn't use SAR.

Checking SAR for a S-frames can lead to L2CAP errors, it could close
the channel with an invalid packet length, since we was removing the 2
of the of any frame that match SAR start bits, without check if it is
an I-frame.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index a9fdfe401f5b..108c2f290ac5 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4117,7 +4117,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		skb_pull(skb, 2);
 		len = skb->len;
 
-		if (__is_sar_start(control))
+		if (__is_sar_start(control) && __is_iframe(control))
 			len -= 2;
 
 		if (pi->fcs == L2CAP_FCS_CRC16)
-- 
cgit v1.2.3-58-ga151


From 6e2b6722abaa3f6042357e11f465488b7c12f94c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 1 Jun 2010 18:52:58 -0300
Subject: Bluetooth: Fix bug in l2cap_ertm_send() behavior
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch makes l2cap_ertm_send() similar to the Send-Data action of
the ERTM spec. We shall not check for RemoteBusy or WAIT_F state
inside l2cap_ertm_send().

Such checks were causing a bug in the retransmission logic of ERTM and
making ERTM stalls until the ACL is dropped.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 108c2f290ac5..69f098d98141 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1415,11 +1415,8 @@ static int l2cap_ertm_send(struct sock *sk)
 	u16 control, fcs;
 	int nsent = 0;
 
-	if (pi->conn_state & L2CAP_CONN_WAIT_F)
-		return 0;
 
-	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) &&
-			!(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
+	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
 
 		if (pi->remote_max_tx &&
 				bt_cb(skb)->retries == pi->remote_max_tx) {
@@ -1792,6 +1789,11 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		if (pi->mode == L2CAP_MODE_STREAMING) {
 			err = l2cap_streaming_send(sk);
 		} else {
+			if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY &&
+					pi->conn_state && L2CAP_CONN_WAIT_F) {
+				err = len;
+				break;
+			}
 			spin_lock_bh(&pi->send_lock);
 			err = l2cap_ertm_send(sk);
 			spin_unlock_bh(&pi->send_lock);
@@ -3378,8 +3380,6 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
 		__mod_retrans_timer();
 
-	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-
 	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
 	spin_unlock_bh(&pi->send_lock);
-- 
cgit v1.2.3-58-ga151


From bfbacc11550a785caf082f3ccfcd7ecf882e09a4 Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Mon, 31 May 2010 18:35:44 -0300
Subject: Bluetooth: Fix SREJ_QUEUE corruption in L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since all TxSeq values are modulo, we shall not compare them directly. We
have to compare their offset inside the TxWindow instead.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 69f098d98141..b89762134e4e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3394,6 +3394,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
 {
 	struct sk_buff *next_skb;
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int tx_seq_offset, next_tx_seq_offset;
 
 	bt_cb(skb)->tx_seq = tx_seq;
 	bt_cb(skb)->sar = sar;
@@ -3404,11 +3406,20 @@ static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_s
 		return 0;
 	}
 
+	tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
+	if (tx_seq_offset < 0)
+		tx_seq_offset += 64;
+
 	do {
 		if (bt_cb(next_skb)->tx_seq == tx_seq)
 			return -EINVAL;
 
-		if (bt_cb(next_skb)->tx_seq > tx_seq) {
+		next_tx_seq_offset = (bt_cb(next_skb)->tx_seq -
+						pi->buffer_seq) % 64;
+		if (next_tx_seq_offset < 0)
+			next_tx_seq_offset += 64;
+
+		if (next_tx_seq_offset > tx_seq_offset) {
 			__skb_queue_before(SREJ_QUEUE(sk), next_skb, skb);
 			return 0;
 		}
-- 
cgit v1.2.3-58-ga151


From 51893f88dd916efead5e24a212c907b2cd35e160 Mon Sep 17 00:00:00 2001
From: Nathan Holstein <nathan@lampreynetworks.com>
Date: Wed, 9 Jun 2010 15:46:25 -0400
Subject: Bluetooth: Fix bug with ERTM minimum packet length

ERTM and streaming mode L2CAP sockets have no minimum packet length. Only
basic mode connections have minimum length.

Instead, validate the packet containing all necessary control, FCS,
and SAR fields.

The patch fixes the drop of valid packets with length lower than 4.

Signed-off-by: Nathan Holstein <ngh@isomerica.net>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index b89762134e4e..4af8fc0d512c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4092,9 +4092,9 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 {
 	struct sock *sk;
 	struct l2cap_pinfo *pi;
-	u16 control, len;
+	u16 control;
 	u8 tx_seq, req_seq;
-	int next_tx_seq_offset, req_seq_offset;
+	int len, next_tx_seq_offset, req_seq_offset;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
@@ -4164,7 +4164,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		}
 
 		if (__is_iframe(control)) {
-			if (len < 4) {
+			if (len < 0) {
 				l2cap_send_disconn_req(pi->conn, sk);
 				goto drop;
 			}
@@ -4192,7 +4192,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (pi->fcs == L2CAP_FCS_CRC16)
 			len -= 2;
 
-		if (len > pi->mps || len < 4 || __is_sframe(control))
+		if (len > pi->mps || len < 0 || __is_sframe(control))
 			goto drop;
 
 		if (l2cap_check_fcs(pi, skb))
-- 
cgit v1.2.3-58-ga151


From c13ffa620f15cb28d18268a773464cf51550fa9f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 13 May 2010 20:50:12 -0300
Subject: Bluetooth: Proper shutdown ERTM when closing the channel

Fix a crash regarding the Monitor Timeout, it was running even after the
shutdown of the ACL connection, which doesn't make sense.

The same code also fixes another issue, before this patch L2CAP was sending
many Disconnections Requests while we have to send only one.

The issues are related to each other, a expired Monitor Timeout can
trigger a Disconnection Request and then we may have a crash if the link
was already deleted.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 59 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 37 insertions(+), 22 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4af8fc0d512c..4415eb48c6ae 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -272,6 +272,24 @@ static void l2cap_chan_del(struct sock *sk, int err)
 		parent->sk_data_ready(parent, 0);
 	} else
 		sk->sk_state_change(sk);
+
+	skb_queue_purge(TX_QUEUE(sk));
+
+	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
+		struct srej_list *l, *tmp;
+
+		del_timer(&l2cap_pi(sk)->retrans_timer);
+		del_timer(&l2cap_pi(sk)->monitor_timer);
+		del_timer(&l2cap_pi(sk)->ack_timer);
+
+		skb_queue_purge(SREJ_QUEUE(sk));
+		skb_queue_purge(BUSY_QUEUE(sk));
+
+		list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
+			list_del(&l->list);
+			kfree(l);
+		}
+	}
 }
 
 /* Service level security */
@@ -345,8 +363,12 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 	struct sk_buff *skb;
 	struct l2cap_hdr *lh;
 	struct l2cap_conn *conn = pi->conn;
+	struct sock *sk = (struct sock *)pi;
 	int count, hlen = L2CAP_HDR_SIZE + 2;
 
+	if (sk->sk_state != BT_CONNECTED)
+		return;
+
 	if (pi->fcs == L2CAP_FCS_CRC16)
 		hlen += 2;
 
@@ -438,10 +460,23 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk)
 {
 	struct l2cap_disconn_req req;
 
+	if (!conn)
+		return;
+
+	skb_queue_purge(TX_QUEUE(sk));
+
+	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
+		del_timer(&l2cap_pi(sk)->retrans_timer);
+		del_timer(&l2cap_pi(sk)->monitor_timer);
+		del_timer(&l2cap_pi(sk)->ack_timer);
+	}
+
 	req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
 	req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 	l2cap_send_cmd(conn, l2cap_get_ident(conn),
 			L2CAP_DISCONN_REQ, sizeof(req), &req);
+
+	sk->sk_state = BT_DISCONN;
 }
 
 /* ---- L2CAP connections ---- */
@@ -734,7 +769,6 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 				sk->sk_type == SOCK_STREAM) {
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 
-			sk->sk_state = BT_DISCONN;
 			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 			l2cap_send_disconn_req(conn, sk);
 		} else
@@ -1415,6 +1449,8 @@ static int l2cap_ertm_send(struct sock *sk)
 	u16 control, fcs;
 	int nsent = 0;
 
+	if (sk->sk_state != BT_CONNECTED)
+		return -ENOTCONN;
 
 	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
 
@@ -3072,7 +3108,6 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		}
 
 	default:
-		sk->sk_state = BT_DISCONN;
 		sk->sk_err = ECONNRESET;
 		l2cap_sock_set_timer(sk, HZ * 5);
 		l2cap_send_disconn_req(conn, sk);
@@ -3126,16 +3161,6 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
-	skb_queue_purge(TX_QUEUE(sk));
-
-	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
-		skb_queue_purge(SREJ_QUEUE(sk));
-		skb_queue_purge(BUSY_QUEUE(sk));
-		del_timer(&l2cap_pi(sk)->retrans_timer);
-		del_timer(&l2cap_pi(sk)->monitor_timer);
-		del_timer(&l2cap_pi(sk)->ack_timer);
-	}
-
 	l2cap_chan_del(sk, ECONNRESET);
 	bh_unlock_sock(sk);
 
@@ -3158,16 +3183,6 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 	if (!sk)
 		return 0;
 
-	skb_queue_purge(TX_QUEUE(sk));
-
-	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
-		skb_queue_purge(SREJ_QUEUE(sk));
-		skb_queue_purge(BUSY_QUEUE(sk));
-		del_timer(&l2cap_pi(sk)->retrans_timer);
-		del_timer(&l2cap_pi(sk)->monitor_timer);
-		del_timer(&l2cap_pi(sk)->ack_timer);
-	}
-
 	l2cap_chan_del(sk, 0);
 	bh_unlock_sock(sk);
 
-- 
cgit v1.2.3-58-ga151


From 95ffa97827371ede501615d9bd048eb5b49e8fe1 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 18 Jun 2010 20:37:33 -0300
Subject: Bluetooth: Fix L2CAP control bit field corruption

When resending an I-frame, ERTM was reusing the control bits from the last
time it was sent, that was causing a corruption in the new control field
due to it dirty fields.

This patches extracts only the SAR bits from the old field and reuse it to
resend the packet, the others bits should be reset and receive the
updated value.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4415eb48c6ae..c2fb26d9286c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1430,6 +1430,8 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 	tx_skb = skb_clone(skb, GFP_ATOMIC);
 	bt_cb(skb)->retries++;
 	control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+	control &= L2CAP_CTRL_SAR;
+
 	control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
 			| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
 	put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
@@ -1465,6 +1467,8 @@ static int l2cap_ertm_send(struct sock *sk)
 		bt_cb(skb)->retries++;
 
 		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+		control &= L2CAP_CTRL_SAR;
+
 		if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
 			control |= L2CAP_CTRL_FINAL;
 			pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
-- 
cgit v1.2.3-58-ga151


From f03585689fdff4ae256edd45a35bc2dd83d3684a Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Tue, 18 May 2010 13:20:32 +0200
Subject: Bluetooth: Add blacklist support for incoming connections

In some circumstances it could be desirable to reject incoming
connections on the baseband level. This patch adds this feature through
two new ioctl's: HCIBLOCKADDR and HCIUNBLOCKADDR. Both take a simple
Bluetooth address as a parameter. BDADDR_ANY can be used with
HCIUNBLOCKADDR to remove all devices from the blacklist.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 fs/compat_ioctl.c                |  2 +
 include/net/bluetooth/hci.h      |  3 ++
 include/net/bluetooth/hci_core.h |  9 ++++
 net/bluetooth/hci_core.c         |  3 ++
 net/bluetooth/hci_event.c        |  2 +-
 net/bluetooth/hci_sock.c         | 90 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 108 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 641640dc7ae5..18638969a659 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1328,6 +1328,8 @@ COMPATIBLE_IOCTL(HCISETLINKPOL)
 COMPATIBLE_IOCTL(HCISETLINKMODE)
 COMPATIBLE_IOCTL(HCISETACLMTU)
 COMPATIBLE_IOCTL(HCISETSCOMTU)
+COMPATIBLE_IOCTL(HCIBLOCKADDR)
+COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
 COMPATIBLE_IOCTL(HCIINQUIRY)
 COMPATIBLE_IOCTL(HCIUARTSETPROTO)
 COMPATIBLE_IOCTL(HCIUARTGETPROTO)
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fc0c502d9fd1..ca2518e0574e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -100,6 +100,9 @@ enum {
 #define HCISETACLMTU	_IOW('H', 227, int)
 #define HCISETSCOMTU	_IOW('H', 228, int)
 
+#define HCIBLOCKADDR	_IOW('H', 230, int)
+#define HCIUNBLOCKADDR	_IOW('H', 231, int)
+
 #define HCIINQUIRY	_IOR('H', 240, int)
 
 /* HCI timeouts */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index e42f6ed5421c..ffc637748b87 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -62,6 +62,11 @@ struct hci_conn_hash {
 	unsigned int     sco_num;
 };
 
+struct bdaddr_list {
+	struct list_head list;
+	bdaddr_t bdaddr;
+};
+
 struct hci_dev {
 	struct list_head list;
 	spinlock_t	lock;
@@ -127,6 +132,7 @@ struct hci_dev {
 
 	struct inquiry_cache	inq_cache;
 	struct hci_conn_hash	conn_hash;
+	struct bdaddr_list	blacklist;
 
 	struct hci_dev_stats	stat;
 
@@ -424,6 +430,9 @@ int hci_get_conn_info(struct hci_dev *hdev, void __user *arg);
 int hci_get_auth_info(struct hci_dev *hdev, void __user *arg);
 int hci_inquiry(void __user *arg);
 
+struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr);
+int hci_blacklist_clear(struct hci_dev *hdev);
+
 void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
 int hci_recv_frame(struct sk_buff *skb);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2f768de87011..aeb2982310a0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -562,6 +562,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 	hci_dev_lock_bh(hdev);
 	inquiry_cache_flush(hdev);
 	hci_conn_hash_flush(hdev);
+	hci_blacklist_clear(hdev);
 	hci_dev_unlock_bh(hdev);
 
 	hci_notify(hdev, HCI_DEV_DOWN);
@@ -923,6 +924,8 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	hci_conn_hash_init(hdev);
 
+	INIT_LIST_HEAD(&hdev->blacklist.list);
+
 	memset(&hdev->stat, 0, sizeof(struct hci_dev_stats));
 
 	atomic_set(&hdev->promisc, 0);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 786b5de0bac4..43feeef3c498 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -952,7 +952,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 
 	mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type);
 
-	if (mask & HCI_LM_ACCEPT) {
+	if ((mask & HCI_LM_ACCEPT) && !hci_blacklist_lookup(hdev, &ev->bdaddr)) {
 		/* Connection accepted */
 		struct inquiry_entry *ie;
 		struct hci_conn *conn;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 38f08f6b86f6..4f170a595934 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -165,6 +165,86 @@ static int hci_sock_release(struct socket *sock)
 	return 0;
 }
 
+struct bdaddr_list *hci_blacklist_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr)
+{
+	struct list_head *p;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+
+	list_for_each(p, &blacklist->list) {
+		struct bdaddr_list *b;
+
+		b = list_entry(p, struct bdaddr_list, list);
+
+		if (bacmp(bdaddr, &b->bdaddr) == 0)
+			return b;
+	}
+
+	return NULL;
+}
+
+static int hci_blacklist_add(struct hci_dev *hdev, void __user *arg)
+{
+	bdaddr_t bdaddr;
+	struct bdaddr_list *entry;
+
+	if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
+		return -EFAULT;
+
+	if (bacmp(&bdaddr, BDADDR_ANY) == 0)
+		return -EBADF;
+
+	if (hci_blacklist_lookup(hdev, &bdaddr))
+		return -EEXIST;
+
+	entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	bacpy(&entry->bdaddr, &bdaddr);
+
+	list_add(&entry->list, &hdev->blacklist.list);
+
+	return 0;
+}
+
+int hci_blacklist_clear(struct hci_dev *hdev)
+{
+	struct list_head *p, *n;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+
+	list_for_each_safe(p, n, &blacklist->list) {
+		struct bdaddr_list *b;
+
+		b = list_entry(p, struct bdaddr_list, list);
+
+		list_del(p);
+		kfree(b);
+	}
+
+	return 0;
+}
+
+static int hci_blacklist_del(struct hci_dev *hdev, void __user *arg)
+{
+	bdaddr_t bdaddr;
+	struct bdaddr_list *entry;
+
+	if (copy_from_user(&bdaddr, arg, sizeof(bdaddr)))
+		return -EFAULT;
+
+	if (bacmp(&bdaddr, BDADDR_ANY) == 0)
+		return hci_blacklist_clear(hdev);
+
+	entry = hci_blacklist_lookup(hdev, &bdaddr);
+	if (!entry)
+		return -ENOENT;
+
+	list_del(&entry->list);
+	kfree(entry);
+
+	return 0;
+}
+
 /* Ioctls that require bound socket */
 static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg)
 {
@@ -194,6 +274,16 @@ static inline int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsign
 	case HCIGETAUTHINFO:
 		return hci_get_auth_info(hdev, (void __user *) arg);
 
+	case HCIBLOCKADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_blacklist_add(hdev, (void __user *) arg);
+
+	case HCIUNBLOCKADDR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EACCES;
+		return hci_blacklist_del(hdev, (void __user *) arg);
+
 	default:
 		if (hdev->ioctl)
 			return hdev->ioctl(hdev, cmd, arg);
-- 
cgit v1.2.3-58-ga151


From 32c2ece5eaec296482077dadc3edd5baa7bd1097 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@nokia.com>
Date: Tue, 18 May 2010 13:54:49 +0200
Subject: Bluetooth: Add debugfs support for showing the blacklist

This patch adds a debugfs blacklist entry for each HCI device which can
be used to list the current content of the blacklist.

Signed-off-by: Johan Hedberg <johan.hedberg@nokia.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 463ffa4fe042..ce44c47eeac1 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -436,6 +436,41 @@ static const struct file_operations inquiry_cache_fops = {
 	.release	= single_release,
 };
 
+static int blacklist_show(struct seq_file *f, void *p)
+{
+	struct hci_dev *hdev = f->private;
+	struct bdaddr_list *blacklist = &hdev->blacklist;
+	struct list_head *l;
+
+	hci_dev_lock_bh(hdev);
+
+	list_for_each(l, &blacklist->list) {
+		struct bdaddr_list *b;
+		bdaddr_t bdaddr;
+
+		b = list_entry(l, struct bdaddr_list, list);
+
+		baswap(&bdaddr, &b->bdaddr);
+
+		seq_printf(f, "%s\n", batostr(&bdaddr));
+	}
+
+	hci_dev_unlock_bh(hdev);
+
+	return 0;
+}
+
+static int blacklist_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, blacklist_show, inode->i_private);
+}
+
+static const struct file_operations blacklist_fops = {
+	.open		= blacklist_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 int hci_register_sysfs(struct hci_dev *hdev)
 {
 	struct device *dev = &hdev->dev;
@@ -465,6 +500,9 @@ int hci_register_sysfs(struct hci_dev *hdev)
 	debugfs_create_file("inquiry_cache", 0444, hdev->debugfs,
 						hdev, &inquiry_cache_fops);
 
+	debugfs_create_file("blacklist", 0444, hdev->debugfs,
+						hdev, &blacklist_fops);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-58-ga151


From 2d0a03460a8a0c611843500735096ff799aa8510 Mon Sep 17 00:00:00 2001
From: Ron Shaffer <rshaffer@codeaurora.org>
Date: Fri, 28 May 2010 11:53:46 -0400
Subject: Bluetooth: Reassigned copyright to Code Aurora Forum

Qualcomm, Inc. has reassigned rights to Code Aurora Forum. Accordingly,
as files are modified by Code Aurora Forum members, the copyright
statement will be updated.

Signed-off-by: Ron Shaffer <rshaffer@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 +-
 net/bluetooth/hci_conn.c         | 2 +-
 net/bluetooth/hci_event.c        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ab6d6186a460..600372d44578 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 800b6b9fbbae..e9fef83449f8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 43feeef3c498..a96980022305 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1,6 +1,6 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
-   Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (c) 2000-2001, 2010, Code Aurora Forum. All rights reserved.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-58-ga151


From 7fe9b298c98fdfecf3b0efb4c971b7696d091ae9 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 12 May 2010 18:32:04 -0300
Subject: Bluetooth: Stop ack_timer if ERTM enters in Local Busy or SREJ_SENT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ack_timer is implemation specific, disabling it in such situation
avoids some potencial errors in the ERTM protocol.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c2fb26d9286c..40cf67c11d20 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3665,6 +3665,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
 
 	pi->conn_state |= L2CAP_CONN_RNR_SENT;
 
+	del_timer(&pi->ack_timer);
+
 	queue_work(_busy_wq, &pi->busy_work);
 
 	return err;
@@ -3914,6 +3916,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 		pi->conn_state |= L2CAP_CONN_SEND_PBIT;
 
 		l2cap_send_srejframe(sk, tx_seq);
+
+		del_timer(&pi->ack_timer);
 	}
 	return 0;
 
-- 
cgit v1.2.3-58-ga151


From 2ece3684b4037ad2394de795d67abbe412ab5e2f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 16 Jun 2010 17:21:44 -0300
Subject: Bluetooth: Update buffer_seq before retransmit frames

Updating buffer_seq first make us able to ack the last I-frame received.
This is also a requirement of the  Profile Tuning Suite software.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 40cf67c11d20..ecf3e3dcefe3 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3931,6 +3931,10 @@ expected:
 		return 0;
 	}
 
+	err = l2cap_push_rx_skb(sk, skb, rx_control);
+	if (err < 0)
+		return 0;
+
 	if (rx_control & L2CAP_CTRL_FINAL) {
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
@@ -3938,10 +3942,6 @@ expected:
 			l2cap_retransmit_frames(sk);
 	}
 
-	err = l2cap_push_rx_skb(sk, skb, rx_control);
-	if (err < 0)
-		return 0;
-
 	__mod_ack_timer();
 
 	pi->num_acked = (pi->num_acked + 1) % num_to_ack;
-- 
cgit v1.2.3-58-ga151


From 3cb123d1c03a0510d3c325bfaa971ce4df1e050d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 29 May 2010 02:24:35 -0300
Subject: Bluetooth: Fix handle of received P-bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ERTM spec mandates that after receive a P-bit we shall send an F-bit in
response. This patch fixes this for retransmitted packets, on
retransmitting we were missing to check for a pending F-bit to be sent.
Also we were missing some annotation to send a F-bit.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ecf3e3dcefe3..f0441b0b0335 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1430,10 +1430,15 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 	tx_skb = skb_clone(skb, GFP_ATOMIC);
 	bt_cb(skb)->retries++;
 	control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
-	control &= L2CAP_CTRL_SAR;
+
+	if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
+		control |= L2CAP_CTRL_FINAL;
+		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+	}
 
 	control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
 			| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
+
 	put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
 	if (pi->fcs == L2CAP_FCS_CRC16) {
@@ -3385,7 +3390,6 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	u16 control = 0;
 
 	pi->frames_sent = 0;
-	pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
@@ -3963,6 +3967,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 	l2cap_drop_acked_frames(sk);
 
 	if (rx_control & L2CAP_CTRL_POLL) {
+		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
 					(pi->unacked_frames > 0))
@@ -4030,6 +4035,8 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 	if (rx_control & L2CAP_CTRL_POLL) {
 		pi->expected_ack_seq = tx_seq;
 		l2cap_drop_acked_frames(sk);
+
+		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 		l2cap_retransmit_one_frame(sk, tx_seq);
 
 		spin_lock_bh(&pi->send_lock);
@@ -4064,6 +4071,9 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
 	pi->expected_ack_seq = tx_seq;
 	l2cap_drop_acked_frames(sk);
 
+	if (rx_control & L2CAP_CTRL_POLL)
+		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
+
 	if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
 		del_timer(&pi->retrans_timer);
 		if (rx_control & L2CAP_CTRL_POLL)
-- 
cgit v1.2.3-58-ga151


From 45d65c46acc39945219eeb3752367ee80ed82799 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 7 Jun 2010 19:21:30 -0300
Subject: Bluetooth: Check the tx_window size on setsockopt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have to check if the proposed tx_window value is not greater that
maximum value supported.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f0441b0b0335..6094870d5d2a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1912,6 +1912,11 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 			break;
 		}
 
+		if (opts.txwin_size > L2CAP_DEFAULT_TX_WINDOW) {
+			err = -EINVAL;
+			break;
+		}
+
 		l2cap_pi(sk)->mode = opts.mode;
 		switch (l2cap_pi(sk)->mode) {
 		case L2CAP_MODE_BASIC:
-- 
cgit v1.2.3-58-ga151


From 260000896750690b774d4343294ae5cbff1423e5 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 11 May 2010 22:02:00 -0300
Subject: Bluetooth: Check packet FCS earlier

This way, if FCS is enabled and the packet is corrupted, we just drop it
without read it len, which could be corrupted.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6094870d5d2a..8c9f577dd46d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4166,25 +4166,25 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		skb_pull(skb, 2);
 		len = skb->len;
 
+		/*
+		 * We can just drop the corrupted I-frame here.
+		 * Receiver will miss it and start proper recovery
+		 * procedures and ask retransmission.
+		 */
+		if (l2cap_check_fcs(pi, skb))
+			goto drop;
+
 		if (__is_sar_start(control) && __is_iframe(control))
 			len -= 2;
 
 		if (pi->fcs == L2CAP_FCS_CRC16)
 			len -= 2;
 
-		/*
-		 * We can just drop the corrupted I-frame here.
-		 * Receiver will miss it and start proper recovery
-		 * procedures and ask retransmission.
-		 */
 		if (len > pi->mps) {
 			l2cap_send_disconn_req(pi->conn, sk);
 			goto drop;
 		}
 
-		if (l2cap_check_fcs(pi, skb))
-			goto drop;
-
 		req_seq = __get_reqseq(control);
 		req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
 		if (req_seq_offset < 0)
@@ -4224,6 +4224,9 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		skb_pull(skb, 2);
 		len = skb->len;
 
+		if (l2cap_check_fcs(pi, skb))
+			goto drop;
+
 		if (__is_sar_start(control))
 			len -= 2;
 
@@ -4233,9 +4236,6 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (len > pi->mps || len < 0 || __is_sframe(control))
 			goto drop;
 
-		if (l2cap_check_fcs(pi, skb))
-			goto drop;
-
 		tx_seq = __get_txseq(control);
 
 		if (pi->expected_tx_seq == tx_seq)
-- 
cgit v1.2.3-58-ga151


From 4ea727ef9d507413f15da0de401d8a50b125649a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 3 Jun 2010 16:34:20 -0300
Subject: Bluetooth: Fix missing retransmission action with RR(P=1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Bluetooth SIG Profile Tuning Suite Software uses the CSA1 spec
to run the L2CAP tests. The new 3.0 spec has a missing
Retransmit-I-Frames action when the Remote side is Busy.
We still start the retransmission timer if Remote is Busy and unacked
frames > 0. We do everything we did before this change plus the
Retransmission of I-frames.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 8c9f577dd46d..7c2273732909 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3405,8 +3405,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
 	}
 
-	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
-		__mod_retrans_timer();
+	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
+		l2cap_retransmit_frames(sk);
 
 	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
-- 
cgit v1.2.3-58-ga151


From 9b108fc0cf4e79c34a7d5626f5c2c4c529ef6d3f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 20 May 2010 16:21:53 -0300
Subject: Bluetooth: Fix ERTM error reporting to the userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If any error occurs during transfers we have to tell userspace that
something wrong happened.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7c2273732909..1e39d72b80af 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -456,7 +456,7 @@ static void l2cap_do_start(struct sock *sk)
 	}
 }
 
-static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk)
+static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
 {
 	struct l2cap_disconn_req req;
 
@@ -477,6 +477,7 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk)
 			L2CAP_DISCONN_REQ, sizeof(req), &req);
 
 	sk->sk_state = BT_DISCONN;
+	sk->sk_err = err;
 }
 
 /* ---- L2CAP connections ---- */
@@ -770,7 +771,7 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 
 			l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
-			l2cap_send_disconn_req(conn, sk);
+			l2cap_send_disconn_req(conn, sk, reason);
 		} else
 			l2cap_chan_del(sk, reason);
 		break;
@@ -1315,7 +1316,7 @@ static void l2cap_monitor_timeout(unsigned long arg)
 
 	bh_lock_sock(sk);
 	if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
-		l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk);
+		l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED);
 		bh_unlock_sock(sk);
 		return;
 	}
@@ -1423,7 +1424,7 @@ static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 
 	if (pi->remote_max_tx &&
 			bt_cb(skb)->retries == pi->remote_max_tx) {
-		l2cap_send_disconn_req(pi->conn, sk);
+		l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
 		return;
 	}
 
@@ -1463,7 +1464,7 @@ static int l2cap_ertm_send(struct sock *sk)
 
 		if (pi->remote_max_tx &&
 				bt_cb(skb)->retries == pi->remote_max_tx) {
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNABORTED);
 			break;
 		}
 
@@ -2191,6 +2192,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 			err = bt_sock_wait_state(sk, BT_CLOSED,
 							sk->sk_lingertime);
 	}
+
+	if (!err && sk->sk_err)
+		err = -sk->sk_err;
+
 	release_sock(sk);
 	return err;
 }
@@ -2462,7 +2467,7 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	case L2CAP_MODE_ERTM:
 		pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
 		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 		break;
 	default:
 		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
@@ -3030,7 +3035,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	/* Complete config. */
 	len = l2cap_parse_conf_req(sk, rsp);
 	if (len < 0) {
-		l2cap_send_disconn_req(conn, sk);
+		l2cap_send_disconn_req(conn, sk, ECONNRESET);
 		goto unlock;
 	}
 
@@ -3100,7 +3105,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 			char req[64];
 
 			if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
-				l2cap_send_disconn_req(conn, sk);
+				l2cap_send_disconn_req(conn, sk, ECONNRESET);
 				goto done;
 			}
 
@@ -3109,7 +3114,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 			len = l2cap_parse_conf_rsp(sk, rsp->data,
 							len, req, &result);
 			if (len < 0) {
-				l2cap_send_disconn_req(conn, sk);
+				l2cap_send_disconn_req(conn, sk, ECONNRESET);
 				goto done;
 			}
 
@@ -3124,7 +3129,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	default:
 		sk->sk_err = ECONNRESET;
 		l2cap_sock_set_timer(sk, HZ * 5);
-		l2cap_send_disconn_req(conn, sk);
+		l2cap_send_disconn_req(conn, sk, ECONNRESET);
 		goto done;
 	}
 
@@ -3565,7 +3570,7 @@ drop:
 	pi->sdu = NULL;
 
 disconnect:
-	l2cap_send_disconn_req(pi->conn, sk);
+	l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 	kfree_skb(skb);
 	return 0;
 }
@@ -3588,7 +3593,7 @@ static void l2cap_busy_work(struct work_struct *work)
 
 		if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
 			err = -EBUSY;
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, EBUSY);
 			goto done;
 		}
 
@@ -3864,7 +3869,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 	/* invalid tx_seq */
 	if (tx_seq_offset >= pi->tx_win) {
-		l2cap_send_disconn_req(pi->conn, sk);
+		l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 		goto drop;
 	}
 
@@ -4181,7 +4186,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 			len -= 2;
 
 		if (len > pi->mps) {
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 			goto drop;
 		}
 
@@ -4197,20 +4202,20 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 
 		/* check for invalid req-seq */
 		if (req_seq_offset > next_tx_seq_offset) {
-			l2cap_send_disconn_req(pi->conn, sk);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 			goto drop;
 		}
 
 		if (__is_iframe(control)) {
 			if (len < 0) {
-				l2cap_send_disconn_req(pi->conn, sk);
+				l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 				goto drop;
 			}
 
 			l2cap_data_channel_iframe(sk, control, skb);
 		} else {
 			if (len != 0) {
-				l2cap_send_disconn_req(pi->conn, sk);
+				l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 				goto drop;
 			}
 
-- 
cgit v1.2.3-58-ga151


From 0e98958d4f827f814444757e0376546b462dfe6f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 19 Apr 2010 14:45:38 -0300
Subject: Bluetooth: Add debug output to ERTM code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the dynamic debug to output info about ERTM protocol stuff.
The following script can be used to enable debug for ERTM:

DEBUGFS="/sys/kernel/debug/dynamic_debug/control"

echo -n 'func l2cap_send_disconn_req +p' > $DEBUGFS
echo -n 'func l2cap_monitor_timeout +p' > $DEBUGFS
echo -n 'func l2cap_retrans_timeout +p' > $DEBUGFS
echo -n 'func l2cap_busy_work  +p' > $DEBUGFS
echo -n 'func l2cap_push_rx_skb +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_iframe +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_rrframe +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_rejframe +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_srejframe +p' > $DEBUGFS
echo -n 'func l2cap_data_channel_rnrframe +p' > $DEBUGFS

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1e39d72b80af..6e8a0512a9c9 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1314,6 +1314,8 @@ static void l2cap_monitor_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
 
+	BT_DBG("sk %p", sk);
+
 	bh_lock_sock(sk);
 	if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
 		l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk, ECONNABORTED);
@@ -1332,6 +1334,8 @@ static void l2cap_retrans_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
 
+	BT_DBG("sk %p", sk);
+
 	bh_lock_sock(sk);
 	l2cap_pi(sk)->retry_count = 1;
 	__mod_monitor_timer();
@@ -3645,6 +3649,8 @@ done:
 	pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
 	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
 
+	BT_DBG("sk %p, Exit local busy", sk);
+
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
@@ -3669,6 +3675,8 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
 	}
 
 	/* Busy Condition */
+	BT_DBG("sk %p, Enter local busy", sk);
+
 	pi->conn_state |= L2CAP_CONN_LOCAL_BUSY;
 	bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
 	__skb_queue_tail(BUSY_QUEUE(sk), skb);
@@ -3847,7 +3855,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	int num_to_ack = (pi->tx_win/6) + 1;
 	int err = 0;
 
-	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
+	BT_DBG("sk %p len %d tx_seq %d rx_control 0x%4.4x", sk, skb->len, tx_seq,
+								rx_control);
 
 	if (L2CAP_CTRL_FINAL & rx_control &&
 			l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
@@ -3892,6 +3901,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 				pi->buffer_seq = pi->buffer_seq_srej;
 				pi->conn_state &= ~L2CAP_CONN_SREJ_SENT;
 				l2cap_send_ack(pi);
+				BT_DBG("sk %p, Exit SREJ_SENT", sk);
 			}
 		} else {
 			struct srej_list *l;
@@ -3920,6 +3930,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 		pi->conn_state |= L2CAP_CONN_SREJ_SENT;
 
+		BT_DBG("sk %p, Enter SREJ", sk);
+
 		INIT_LIST_HEAD(SREJ_LIST(sk));
 		pi->buffer_seq_srej = pi->buffer_seq;
 
@@ -3973,6 +3985,9 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 
+	BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, __get_reqseq(rx_control),
+						rx_control);
+
 	pi->expected_ack_seq = __get_reqseq(rx_control);
 	l2cap_drop_acked_frames(sk);
 
@@ -4018,6 +4033,8 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_reqseq(rx_control);
 
+	BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
+
 	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
 	pi->expected_ack_seq = tx_seq;
@@ -4040,6 +4057,8 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_reqseq(rx_control);
 
+	BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
+
 	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
 	if (rx_control & L2CAP_CTRL_POLL) {
@@ -4077,6 +4096,8 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_reqseq(rx_control);
 
+	BT_DBG("sk %p, req_seq %d ctrl 0x%4.4x", sk, tx_seq, rx_control);
+
 	pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
 	pi->expected_ack_seq = tx_seq;
 	l2cap_drop_acked_frames(sk);
-- 
cgit v1.2.3-58-ga151


From 64988868637304330f7df20d08b965592312c531 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 10 May 2010 14:54:14 -0300
Subject: Bluetooth: Tweaks to l2cap_send_i_or_rr_or_rnr() flow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2cap_send_sframe() already set the F-bit if we set L2CAP_CONN_SEND_FBIT
and unset L2CAP_CONN_SEND_FBIT after send the F-bit.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6e8a0512a9c9..65c6a989449a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3408,10 +3408,9 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
-		control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL;
+		control |= L2CAP_SUPER_RCV_NOT_READY;
 		l2cap_send_sframe(pi, control);
 		pi->conn_state |= L2CAP_CONN_RNR_SENT;
-		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
 	}
 
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
-- 
cgit v1.2.3-58-ga151


From 85eb53c6f719523dde9e0658823dffd2664d4d1c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 3 Jun 2010 18:43:28 -0300
Subject: Bluetooth: Change the way we set ERTM mode as mandatory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the socket type is SOCK_STREAM we set Enhanced Retransmisson Mode or
Streaming Mode as mandatory. That means that we will close the channel
if the other side doesn't support or request the the mandatory mode.
Basic mode can't be set as mandatory.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 65c6a989449a..e78a7504d09c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -831,6 +831,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 
 		pi->imtu = l2cap_pi(parent)->imtu;
 		pi->omtu = l2cap_pi(parent)->omtu;
+		pi->conf_state = l2cap_pi(parent)->conf_state;
 		pi->mode = l2cap_pi(parent)->mode;
 		pi->fcs  = l2cap_pi(parent)->fcs;
 		pi->max_tx = l2cap_pi(parent)->max_tx;
@@ -841,10 +842,12 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	} else {
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
-		if (enable_ertm && sk->sk_type == SOCK_STREAM)
+		if (enable_ertm && sk->sk_type == SOCK_STREAM) {
 			pi->mode = L2CAP_MODE_ERTM;
-		else
+			pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		} else {
 			pi->mode = L2CAP_MODE_BASIC;
+		}
 		pi->max_tx = L2CAP_DEFAULT_MAX_TX;
 		pi->fcs  = L2CAP_FCS_CRC16;
 		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
@@ -1925,6 +1928,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		l2cap_pi(sk)->mode = opts.mode;
 		switch (l2cap_pi(sk)->mode) {
 		case L2CAP_MODE_BASIC:
+			l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_STATE2_DEVICE;
 			break;
 		case L2CAP_MODE_ERTM:
 		case L2CAP_MODE_STREAMING:
@@ -2469,7 +2473,12 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	switch (pi->mode) {
 	case L2CAP_MODE_STREAMING:
 	case L2CAP_MODE_ERTM:
-		pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
+			pi->mode = l2cap_select_mode(rfc.mode,
+					pi->conn->feat_mask);
+			break;
+		}
+
 		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
 			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
 		break;
@@ -2602,7 +2611,12 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 	switch (pi->mode) {
 	case L2CAP_MODE_STREAMING:
 	case L2CAP_MODE_ERTM:
-		pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
+			pi->mode = l2cap_select_mode(rfc.mode,
+					pi->conn->feat_mask);
+			break;
+		}
+
 		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
 			return -ECONNREFUSED;
 		break;
-- 
cgit v1.2.3-58-ga151


From 742e519b0db4a470008118b48f0baea6126e2122 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 8 Jun 2010 19:09:48 -0300
Subject: Bluetooth: Disconnect the channel if we don't want the proposed mode

If the device is a STATE 2 then it should disconnect the channel if the
remote device propose a mode different from its mandatory mode.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e78a7504d09c..03e9125dd74f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2617,8 +2617,9 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 			break;
 		}
 
-		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
+		if (pi->mode != rfc.mode)
 			return -ECONNREFUSED;
+
 		break;
 	default:
 		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
-- 
cgit v1.2.3-58-ga151


From ae12d52efd492ee6634c34322302ac754ff8cde8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 8 Jun 2010 19:29:00 -0300
Subject: Bluetooth: Prefer Basic Mode on receipt of ConfigReq

If we choose to use Basic Mode then we have to refuse the received mode
and propose Basic Mode again.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 03e9125dd74f..ceed5df43fee 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2621,9 +2621,6 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 			return -ECONNREFUSED;
 
 		break;
-	default:
-		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
-		break;
 	}
 
 done:
-- 
cgit v1.2.3-58-ga151


From 625477523b4e656fbcc5ec2a8ca7a1beb39b1caf Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 8 Jun 2010 20:05:31 -0300
Subject: Bluetooth: Actively send request for Basic Mode

The Profile Tuning Suite requires that we send a RFC containing the
Basic Mode configuration when requesting Basic Mode.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ceed5df43fee..ca9bab225777 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2492,6 +2492,14 @@ done:
 	case L2CAP_MODE_BASIC:
 		if (pi->imtu != L2CAP_DEFAULT_MTU)
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->imtu);
+
+		rfc.mode            = L2CAP_MODE_BASIC;
+		rfc.txwin_size      = 0;
+		rfc.max_transmit    = 0;
+		rfc.retrans_timeout = 0;
+		rfc.monitor_timeout = 0;
+		rfc.max_pdu_size    = 0;
+
 		break;
 
 	case L2CAP_MODE_ERTM:
@@ -2504,9 +2512,6 @@ done:
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
 			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
-		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
-					sizeof(rfc), (unsigned long) &rfc);
-
 		if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
 			break;
 
@@ -2527,9 +2532,6 @@ done:
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
 			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
-		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
-					sizeof(rfc), (unsigned long) &rfc);
-
 		if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
 			break;
 
@@ -2541,6 +2543,9 @@ done:
 		break;
 	}
 
+	l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc),
+						(unsigned long) &rfc);
+
 	/* FIXME: Need actual value of the flush timeout */
 	//if (flush_to != L2CAP_DEFAULT_FLUSH_TO)
 	//   l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, 2, pi->flush_to);
-- 
cgit v1.2.3-58-ga151


From 6c2ea7a8f5fea67fa20e5825401b8fce5a78dbf6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 8 Jun 2010 20:08:49 -0300
Subject: Bluetooth: Refuse ConfigRsp with different mode

If our mode is Basic Mode we have to refuse any ConfigRsp that proposes
a different mode.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ca9bab225777..c5904082392c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2747,7 +2747,6 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 							rfc.mode != pi->mode)
 				return -ECONNREFUSED;
 
-			pi->mode = rfc.mode;
 			pi->fcs = 0;
 
 			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
@@ -2756,6 +2755,11 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 		}
 	}
 
+	if (pi->mode == L2CAP_MODE_BASIC && pi->mode != rfc.mode)
+		return -ECONNREFUSED;
+
+	pi->mode = rfc.mode;
+
 	if (*result == L2CAP_CONF_SUCCESS) {
 		switch (rfc.mode) {
 		case L2CAP_MODE_ERTM:
-- 
cgit v1.2.3-58-ga151


From 2ba13ed678775195e8255b4e503c59d48b615bd8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 9 Jun 2010 16:39:05 -0300
Subject: Bluetooth: Remove check for supported mode

Since now we have checks for the supported mode before on
l2cap_info_rsp we can remove the check for it here.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c5904082392c..2fb45c481762 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2473,15 +2473,10 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 	switch (pi->mode) {
 	case L2CAP_MODE_STREAMING:
 	case L2CAP_MODE_ERTM:
-		if (!(pi->conf_state & L2CAP_CONF_STATE2_DEVICE)) {
-			pi->mode = l2cap_select_mode(rfc.mode,
-					pi->conn->feat_mask);
+		if (pi->conf_state & L2CAP_CONF_STATE2_DEVICE)
 			break;
-		}
 
-		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
-			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
-		break;
+		/* fall through */
 	default:
 		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
 		break;
-- 
cgit v1.2.3-58-ga151


From cf6c2c0b9f47ee3cd12684b905725c8376d52135 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 7 Jun 2010 20:54:45 -0300
Subject: Bluetooth: Disconnect early if mode is not supported

When mode is mandatory we shall not send connect request and report this
to the userspace as well.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  5 ++++
 net/bluetooth/l2cap.c         | 56 ++++++++++++++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 7c695bfd853c..f8bae5415438 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -287,6 +287,11 @@ struct l2cap_conn {
 	struct l2cap_chan_list chan_list;
 };
 
+struct sock_del_list {
+	struct sock *sk;
+	struct list_head list;
+};
+
 #define L2CAP_INFO_CL_MTU_REQ_SENT	0x01
 #define L2CAP_INFO_FEAT_MASK_REQ_SENT	0x04
 #define L2CAP_INFO_FEAT_MASK_REQ_DONE	0x08
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2fb45c481762..6a33d269389e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -456,6 +456,22 @@ static void l2cap_do_start(struct sock *sk)
 	}
 }
 
+static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
+{
+	u32 local_feat_mask = l2cap_feat_mask;
+	if (enable_ertm)
+		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
+
+	switch (mode) {
+	case L2CAP_MODE_ERTM:
+		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
+	case L2CAP_MODE_STREAMING:
+		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
+	default:
+		return 0x00;
+	}
+}
+
 static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int err)
 {
 	struct l2cap_disconn_req req;
@@ -484,10 +500,13 @@ static void l2cap_send_disconn_req(struct l2cap_conn *conn, struct sock *sk, int
 static void l2cap_conn_start(struct l2cap_conn *conn)
 {
 	struct l2cap_chan_list *l = &conn->chan_list;
+	struct sock_del_list del, *tmp1, *tmp2;
 	struct sock *sk;
 
 	BT_DBG("conn %p", conn);
 
+	INIT_LIST_HEAD(&del.list);
+
 	read_lock(&l->lock);
 
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
@@ -503,6 +522,19 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 			if (l2cap_check_security(sk) &&
 					__l2cap_no_conn_pending(sk)) {
 				struct l2cap_conn_req req;
+
+				if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
+						conn->feat_mask)
+						&& l2cap_pi(sk)->conf_state &
+						L2CAP_CONF_STATE2_DEVICE) {
+					tmp1 = kzalloc(sizeof(struct srej_list),
+							GFP_ATOMIC);
+					tmp1->sk = sk;
+					list_add_tail(&tmp1->list, &del.list);
+					bh_unlock_sock(sk);
+					continue;
+				}
+
 				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
 				req.psm  = l2cap_pi(sk)->psm;
 
@@ -542,6 +574,14 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 	}
 
 	read_unlock(&l->lock);
+
+	list_for_each_entry_safe(tmp1, tmp2, &del.list, list) {
+		bh_lock_sock(tmp1->sk);
+		__l2cap_sock_close(tmp1->sk, ECONNRESET);
+		bh_unlock_sock(tmp1->sk);
+		list_del(&tmp1->list);
+		kfree(tmp1);
+	}
 }
 
 static void l2cap_conn_ready(struct l2cap_conn *conn)
@@ -2429,22 +2469,6 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
 
-static int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
-{
-	u32 local_feat_mask = l2cap_feat_mask;
-	if (enable_ertm)
-		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
-
-	switch (mode) {
-	case L2CAP_MODE_ERTM:
-		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
-	case L2CAP_MODE_STREAMING:
-		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
-	default:
-		return 0x00;
-	}
-}
-
 static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
 {
 	switch (mode) {
-- 
cgit v1.2.3-58-ga151


From 8cb8e6f1684be13b51f8429b15f39c140326b327 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 14 Jun 2010 02:26:15 -0300
Subject: Bluetooth: Don't accept ConfigReq if we aren't in the BT_CONFIG state

If such event happens we shall reply with a Command Reject, because we are
not expecting any configure request.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6a33d269389e..f6e46fdddd2b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3052,8 +3052,14 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	if (!sk)
 		return -ENOENT;
 
-	if (sk->sk_state == BT_DISCONN)
+	if (sk->sk_state != BT_CONFIG) {
+		struct l2cap_cmd_rej rej;
+
+		rej.reason = cpu_to_le16(0x0002);
+		l2cap_send_cmd(conn, cmd->ident, L2CAP_COMMAND_REJ,
+				sizeof(rej), &rej);
 		goto unlock;
+	}
 
 	/* Reject if config buffer is too small. */
 	len = cmd_len - sizeof(*req);
-- 
cgit v1.2.3-58-ga151


From e0f66218b3a7d0bcf37ca95186123c257fda0ba5 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 21 Jun 2010 18:50:49 -0300
Subject: Bluetooth: Remove the send_lock spinlock from ERTM

Using a lock to deal with the ERTM race condition - interruption with
new data from the hci layer - is wrong. We should use the native skb
backlog queue.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  1 -
 net/bluetooth/l2cap.c         | 28 +---------------------------
 2 files changed, 1 insertion(+), 28 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index f8bae5415438..7df70e47c878 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -358,7 +358,6 @@ struct l2cap_pinfo {
 
 	__le16		sport;
 
-	spinlock_t		send_lock;
 	struct timer_list	retrans_timer;
 	struct timer_list	monitor_timer;
 	struct timer_list	ack_timer;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f6e46fdddd2b..dc8601fc2404 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1562,16 +1562,11 @@ static int l2cap_retransmit_frames(struct sock *sk)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	int ret;
 
-	spin_lock_bh(&pi->send_lock);
-
 	if (!skb_queue_empty(TX_QUEUE(sk)))
 		sk->sk_send_head = TX_QUEUE(sk)->next;
 
 	pi->next_tx_seq = pi->expected_ack_seq;
 	ret = l2cap_ertm_send(sk);
-
-	spin_unlock_bh(&pi->send_lock);
-
 	return ret;
 }
 
@@ -1579,7 +1574,6 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 {
 	struct sock *sk = (struct sock *)pi;
 	u16 control = 0;
-	int nframes;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
@@ -1590,11 +1584,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 		return;
 	}
 
-	spin_lock_bh(&pi->send_lock);
-	nframes = l2cap_ertm_send(sk);
-	spin_unlock_bh(&pi->send_lock);
-
-	if (nframes > 0)
+	if (l2cap_ertm_send(sk) > 0)
 		return;
 
 	control |= L2CAP_SUPER_RCV_READY;
@@ -1789,10 +1779,8 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		size += buflen;
 	}
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
-	spin_lock_bh(&pi->send_lock);
 	if (sk->sk_send_head == NULL)
 		sk->sk_send_head = sar_queue.next;
-	spin_unlock_bh(&pi->send_lock);
 
 	return size;
 }
@@ -1864,14 +1852,9 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 			}
 			__skb_queue_tail(TX_QUEUE(sk), skb);
 
-			if (pi->mode == L2CAP_MODE_ERTM)
-				spin_lock_bh(&pi->send_lock);
-
 			if (sk->sk_send_head == NULL)
 				sk->sk_send_head = skb;
 
-			if (pi->mode == L2CAP_MODE_ERTM)
-				spin_unlock_bh(&pi->send_lock);
 		} else {
 		/* Segment SDU into multiples PDUs */
 			err = l2cap_sar_segment_sdu(sk, msg, len);
@@ -1887,9 +1870,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				err = len;
 				break;
 			}
-			spin_lock_bh(&pi->send_lock);
 			err = l2cap_ertm_send(sk);
-			spin_unlock_bh(&pi->send_lock);
 		}
 
 		if (err >= 0)
@@ -2464,7 +2445,6 @@ static inline void l2cap_ertm_init(struct sock *sk)
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
 	__skb_queue_head_init(BUSY_QUEUE(sk));
-	spin_lock_init(&l2cap_pi(sk)->send_lock);
 
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
@@ -3462,9 +3442,7 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
 		l2cap_retransmit_frames(sk);
 
-	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
-	spin_unlock_bh(&pi->send_lock);
 
 	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
 			pi->frames_sent == 0) {
@@ -4066,9 +4044,7 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 			l2cap_send_ack(pi);
 		} else {
-			spin_lock_bh(&pi->send_lock);
 			l2cap_ertm_send(sk);
-			spin_unlock_bh(&pi->send_lock);
 		}
 	}
 }
@@ -4113,9 +4089,7 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 		pi->conn_state |= L2CAP_CONN_SEND_FBIT;
 		l2cap_retransmit_one_frame(sk, tx_seq);
 
-		spin_lock_bh(&pi->send_lock);
 		l2cap_ertm_send(sk);
-		spin_unlock_bh(&pi->send_lock);
 
 		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 			pi->srej_save_reqseq = tx_seq;
-- 
cgit v1.2.3-58-ga151


From 218bb9dfd21472128f86b38ad2eab123205c2991 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 21 Jun 2010 18:53:22 -0300
Subject: Bluetooth: Add backlog queue to ERTM code

backlog queue is the canonical mechanism to avoid race conditions due
interrupts in bottom half context. After the socket lock is released the
net core take care of push all skb in its backlog queue.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 132 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 79 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index dc8601fc2404..cf4481f7f566 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -77,6 +77,8 @@ static void l2cap_sock_kill(struct sock *sk);
 static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 				u8 code, u8 ident, u16 dlen, void *data);
 
+static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb);
+
 /* ---- L2CAP timers ---- */
 static void l2cap_sock_timeout(unsigned long arg)
 {
@@ -2447,6 +2449,8 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	__skb_queue_head_init(BUSY_QUEUE(sk));
 
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
+
+	sk->sk_backlog_rcv = l2cap_ertm_data_rcv;
 }
 
 static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
@@ -4171,13 +4175,83 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 	return 0;
 }
 
+static int l2cap_ertm_data_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u16 control;
+	u8 req_seq;
+	int len, next_tx_seq_offset, req_seq_offset;
+
+	control = get_unaligned_le16(skb->data);
+	skb_pull(skb, 2);
+	len = skb->len;
+
+	/*
+	 * We can just drop the corrupted I-frame here.
+	 * Receiver will miss it and start proper recovery
+	 * procedures and ask retransmission.
+	 */
+	if (l2cap_check_fcs(pi, skb))
+		goto drop;
+
+	if (__is_sar_start(control) && __is_iframe(control))
+		len -= 2;
+
+	if (pi->fcs == L2CAP_FCS_CRC16)
+		len -= 2;
+
+	if (len > pi->mps) {
+		l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+		goto drop;
+	}
+
+	req_seq = __get_reqseq(control);
+	req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
+	if (req_seq_offset < 0)
+		req_seq_offset += 64;
+
+	next_tx_seq_offset =
+		(pi->next_tx_seq - pi->expected_ack_seq) % 64;
+	if (next_tx_seq_offset < 0)
+		next_tx_seq_offset += 64;
+
+	/* check for invalid req-seq */
+	if (req_seq_offset > next_tx_seq_offset) {
+		l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+		goto drop;
+	}
+
+	if (__is_iframe(control)) {
+		if (len < 0) {
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+			goto drop;
+		}
+
+		l2cap_data_channel_iframe(sk, control, skb);
+	} else {
+		if (len != 0) {
+			BT_ERR("%d", len);
+			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+			goto drop;
+		}
+
+		l2cap_data_channel_sframe(sk, control, skb);
+	}
+
+	return 0;
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
 static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
 {
 	struct sock *sk;
 	struct l2cap_pinfo *pi;
 	u16 control;
-	u8 tx_seq, req_seq;
-	int len, next_tx_seq_offset, req_seq_offset;
+	u8 tx_seq;
+	int len;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
@@ -4207,59 +4281,11 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		break;
 
 	case L2CAP_MODE_ERTM:
-		control = get_unaligned_le16(skb->data);
-		skb_pull(skb, 2);
-		len = skb->len;
-
-		/*
-		 * We can just drop the corrupted I-frame here.
-		 * Receiver will miss it and start proper recovery
-		 * procedures and ask retransmission.
-		 */
-		if (l2cap_check_fcs(pi, skb))
-			goto drop;
-
-		if (__is_sar_start(control) && __is_iframe(control))
-			len -= 2;
-
-		if (pi->fcs == L2CAP_FCS_CRC16)
-			len -= 2;
-
-		if (len > pi->mps) {
-			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
-			goto drop;
-		}
-
-		req_seq = __get_reqseq(control);
-		req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
-		if (req_seq_offset < 0)
-			req_seq_offset += 64;
-
-		next_tx_seq_offset =
-			(pi->next_tx_seq - pi->expected_ack_seq) % 64;
-		if (next_tx_seq_offset < 0)
-			next_tx_seq_offset += 64;
-
-		/* check for invalid req-seq */
-		if (req_seq_offset > next_tx_seq_offset) {
-			l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
-			goto drop;
-		}
-
-		if (__is_iframe(control)) {
-			if (len < 0) {
-				l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
-				goto drop;
-			}
-
-			l2cap_data_channel_iframe(sk, control, skb);
+		if (!sock_owned_by_user(sk)) {
+			l2cap_ertm_data_rcv(sk, skb);
 		} else {
-			if (len != 0) {
-				l2cap_send_disconn_req(pi->conn, sk, ECONNRESET);
+			if (sk_add_backlog(sk, skb))
 				goto drop;
-			}
-
-			l2cap_data_channel_sframe(sk, control, skb);
 		}
 
 		goto done;
-- 
cgit v1.2.3-58-ga151


From 712132eb541e4a76afad97898dc0ce6b6c0032d8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Mon, 21 Jun 2010 19:39:50 -0300
Subject: Bluetooth: Improve ERTM local busy handling

Now we also check if can push skb userspace just after receive a new
skb instead of only wait the l2cap_busy_work wake up from time to time
to check the local busy condition.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 83 +++++++++++++++++++++++++++++----------------------
 1 file changed, 47 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cf4481f7f566..6b839d682143 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3606,6 +3606,46 @@ disconnect:
 	return 0;
 }
 
+static int l2cap_try_push_rx_skb(struct sock *sk)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *skb;
+	u16 control;
+	int err;
+
+	while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
+		control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
+		err = l2cap_ertm_reassembly_sdu(sk, skb, control);
+		if (err < 0) {
+			skb_queue_head(BUSY_QUEUE(sk), skb);
+			return -EBUSY;
+		}
+
+		pi->buffer_seq = (pi->buffer_seq + 1) % 64;
+	}
+
+	if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
+		goto done;
+
+	control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+	control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
+	l2cap_send_sframe(pi, control);
+	l2cap_pi(sk)->retry_count = 1;
+
+	del_timer(&pi->retrans_timer);
+	__mod_monitor_timer();
+
+	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
+
+done:
+	pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
+	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
+
+	BT_DBG("sk %p, Exit local busy", sk);
+
+	return 0;
+}
+
 static void l2cap_busy_work(struct work_struct *work)
 {
 	DECLARE_WAITQUEUE(wait, current);
@@ -3614,7 +3654,6 @@ static void l2cap_busy_work(struct work_struct *work)
 	struct sock *sk = (struct sock *)pi;
 	int n_tries = 0, timeo = HZ/5, err;
 	struct sk_buff *skb;
-	u16 control;
 
 	lock_sock(sk);
 
@@ -3625,7 +3664,7 @@ static void l2cap_busy_work(struct work_struct *work)
 		if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
 			err = -EBUSY;
 			l2cap_send_disconn_req(pi->conn, sk, EBUSY);
-			goto done;
+			break;
 		}
 
 		if (!timeo)
@@ -3633,7 +3672,7 @@ static void l2cap_busy_work(struct work_struct *work)
 
 		if (signal_pending(current)) {
 			err = sock_intr_errno(timeo);
-			goto done;
+			break;
 		}
 
 		release_sock(sk);
@@ -3642,42 +3681,12 @@ static void l2cap_busy_work(struct work_struct *work)
 
 		err = sock_error(sk);
 		if (err)
-			goto done;
-
-		while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
-			control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
-			err = l2cap_ertm_reassembly_sdu(sk, skb, control);
-			if (err < 0) {
-				skb_queue_head(BUSY_QUEUE(sk), skb);
-				break;
-			}
-
-			pi->buffer_seq = (pi->buffer_seq + 1) % 64;
-		}
+			break;
 
-		if (!skb)
+		if (l2cap_try_push_rx_skb(sk) == 0)
 			break;
 	}
 
-	if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
-		goto done;
-
-	control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-	control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
-	l2cap_send_sframe(pi, control);
-	l2cap_pi(sk)->retry_count = 1;
-
-	del_timer(&pi->retrans_timer);
-	__mod_monitor_timer();
-
-	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
-
-done:
-	pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
-	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
-
-	BT_DBG("sk %p, Exit local busy", sk);
-
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(sk_sleep(sk), &wait);
 
@@ -3692,7 +3701,9 @@ static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
 		__skb_queue_tail(BUSY_QUEUE(sk), skb);
-		return -EBUSY;
+		return l2cap_try_push_rx_skb(sk);
+
+
 	}
 
 	err = l2cap_ertm_reassembly_sdu(sk, skb, control);
-- 
cgit v1.2.3-58-ga151


From 8b0dc6dc827fb71efd6c9dfb5c4172b7b5c96cee Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:22 -0300
Subject: Bluetooth: Fix l2cap_sock_connect error return.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Return a proper error value if socket is already connected.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6b839d682143..e322be8ff948 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1158,6 +1158,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 
 	case BT_CONNECTED:
 		/* Already connected */
+		err = -EISCONN;
 		goto done;
 
 	case BT_OPEN:
-- 
cgit v1.2.3-58-ga151


From 305682e8377b0f560d4b885c169a72e6a62331e9 Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:23 -0300
Subject: Bluetooth: Make l2cap_streaming_send() void.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It doesn't make sense to have a return value since we always set it
to 0.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e322be8ff948..33e134b0d402 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1420,7 +1420,7 @@ static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
 	hci_send_acl(pi->conn->hcon, skb, 0);
 }
 
-static int l2cap_streaming_send(struct sock *sk)
+static void l2cap_streaming_send(struct sock *sk)
 {
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -1450,7 +1450,6 @@ static int l2cap_streaming_send(struct sock *sk)
 		skb = skb_dequeue(TX_QUEUE(sk));
 		kfree_skb(skb);
 	}
-	return 0;
 }
 
 static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
@@ -1866,7 +1865,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		}
 
 		if (pi->mode == L2CAP_MODE_STREAMING) {
-			err = l2cap_streaming_send(sk);
+			l2cap_streaming_send(sk);
 		} else {
 			if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY &&
 					pi->conn_state && L2CAP_CONN_WAIT_F) {
-- 
cgit v1.2.3-58-ga151


From f9dd11b03c5c3cd6bdf2e503400bbc922c898974 Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:24 -0300
Subject: Bluetooth: Fix error return value on sendmsg.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we try to send a message bigger than the outgoing MTU value
EMSGSIZE (message too long) should be returned.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 33e134b0d402..884b840081ae 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1827,7 +1827,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	case L2CAP_MODE_BASIC:
 		/* Check outgoing MTU */
 		if (len > pi->omtu) {
-			err = -EINVAL;
+			err = -EMSGSIZE;
 			goto done;
 		}
 
-- 
cgit v1.2.3-58-ga151


From bc766db2ef3700ae74bdfc88d74b771b97971a24 Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:25 -0300
Subject: Bluetooth: Fix error return value on sendmsg.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the socket is in a bad state EBADFD is more appropriate then EINVAL.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 884b840081ae..ac25952538fd 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1881,7 +1881,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	default:
 		BT_DBG("bad state %1.1x", pi->mode);
-		err = -EINVAL;
+		err = -EBADFD;
 	}
 
 done:
-- 
cgit v1.2.3-58-ga151


From 57d3b22bf56579bb1ab2d6f5020d372c99a7afae Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:26 -0300
Subject: Bluetooth: Fix error return for l2cap_connect_rsp().
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ac25952538fd..58c81cbb4040 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2986,11 +2986,11 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	if (scid) {
 		sk = l2cap_get_chan_by_scid(&conn->chan_list, scid);
 		if (!sk)
-			return 0;
+			return -EFAULT;
 	} else {
 		sk = l2cap_get_chan_by_ident(&conn->chan_list, cmd->ident);
 		if (!sk)
-			return 0;
+			return -EFAULT;
 	}
 
 	switch (result) {
-- 
cgit v1.2.3-58-ga151


From 7a560e5c99dc5f03e2c0dbe05ed20008af5d0bcf Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:27 -0300
Subject: Bluetooth: Fix error value for wrong FCS.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 58c81cbb4040..de545f1687d8 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3423,7 +3423,7 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi,  struct sk_buff *skb)
 		our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size);
 
 		if (our_fcs != rcv_fcs)
-			return -EINVAL;
+			return -EBADMSG;
 	}
 	return 0;
 }
-- 
cgit v1.2.3-58-ga151


From 963cf687e825f7a59817f145a1ea19bdc224a18f Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Tue, 22 Jun 2010 13:56:28 -0300
Subject: Bluetooth: Fix error return on L2CAP-HCI interface.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

L2CAP only deals with ACL links. EINVAL should be returned otherwise.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index de545f1687d8..a3dfee97ab9d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -4415,7 +4415,7 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
 	struct hlist_node *node;
 
 	if (type != ACL_LINK)
-		return 0;
+		return -EINVAL;
 
 	BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
 
@@ -4448,7 +4448,7 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
 	BT_DBG("hcon %p bdaddr %s status %d", hcon, batostr(&hcon->dst), status);
 
 	if (hcon->type != ACL_LINK)
-		return 0;
+		return -EINVAL;
 
 	if (!status) {
 		conn = l2cap_conn_add(hcon, status);
@@ -4477,7 +4477,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
 	BT_DBG("hcon %p reason %d", hcon, reason);
 
 	if (hcon->type != ACL_LINK)
-		return 0;
+		return -EINVAL;
 
 	l2cap_conn_del(hcon, bt_err(reason));
 
-- 
cgit v1.2.3-58-ga151


From e9aeb2ddd441f0c8699ff04c499d7213730a0f04 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Thu, 8 Jul 2010 20:08:18 -0300
Subject: Bluetooth: Send ConfigReq after send a ConnectionRsp

The extended L2CAP features requires that one should initiate a
ConfigReq after send the ConnectionRsp. This patch changes the behaviour
of the configuration process of our stack.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 44 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index a3dfee97ab9d..e366be0792cb 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -74,6 +74,7 @@ static void __l2cap_sock_close(struct sock *sk, int reason);
 static void l2cap_sock_close(struct sock *sk);
 static void l2cap_sock_kill(struct sock *sk);
 
+static int l2cap_build_conf_req(struct sock *sk, void *data);
 static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn,
 				u8 code, u8 ident, u16 dlen, void *data);
 
@@ -548,6 +549,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 			}
 		} else if (sk->sk_state == BT_CONNECT2) {
 			struct l2cap_conn_rsp rsp;
+			char buf[128];
 			rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
 			rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
 
@@ -570,6 +572,17 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 
 			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
+
+			if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT ||
+					rsp.result != L2CAP_CR_SUCCESS) {
+				bh_unlock_sock(sk);
+				continue;
+			}
+
+			l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+			l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+						l2cap_build_conf_req(sk, buf), buf);
+			l2cap_pi(sk)->num_conf_req++;
 		}
 
 		bh_unlock_sock(sk);
@@ -1897,6 +1910,8 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
 		struct l2cap_conn_rsp rsp;
+		struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+		u8 buf[128];
 
 		sk->sk_state = BT_CONFIG;
 
@@ -1907,6 +1922,16 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
 					L2CAP_CONN_RSP, sizeof(rsp), &rsp);
 
+		if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) {
+			release_sock(sk);
+			return 0;
+		}
+
+		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+				l2cap_build_conf_req(sk, buf), buf);
+		l2cap_pi(sk)->num_conf_req++;
+
 		release_sock(sk);
 		return 0;
 	}
@@ -2613,7 +2638,7 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 		}
 	}
 
-	if (pi->num_conf_rsp || pi->num_conf_req)
+	if (pi->num_conf_rsp || pi->num_conf_req > 1)
 		goto done;
 
 	switch (pi->mode) {
@@ -2857,7 +2882,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
 	struct l2cap_chan_list *list = &conn->chan_list;
 	struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
 	struct l2cap_conn_rsp rsp;
-	struct sock *sk, *parent;
+	struct sock *parent, *uninitialized_var(sk);
 	int result, status = L2CAP_CS_NO_INFO;
 
 	u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -2966,6 +2991,15 @@ sendresp:
 					L2CAP_INFO_REQ, sizeof(info), &info);
 	}
 
+	if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
+				result == L2CAP_CR_SUCCESS) {
+		u8 buf[128];
+		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
+					l2cap_build_conf_req(sk, buf), buf);
+		l2cap_pi(sk)->num_conf_req++;
+	}
+
 	return 0;
 }
 
@@ -2998,9 +3032,13 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 		sk->sk_state = BT_CONFIG;
 		l2cap_pi(sk)->ident = 0;
 		l2cap_pi(sk)->dcid = dcid;
-		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
 		l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
 
+		if (l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)
+			break;
+
+		l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
+
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
 					l2cap_build_conf_req(sk, req), req);
 		l2cap_pi(sk)->num_conf_req++;
-- 
cgit v1.2.3-58-ga151


From 89746b856c88af9e5019e84615d88e002fb54dc3 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 9 Jul 2010 16:38:34 -0300
Subject: Bluetooth: Fix bug in kzalloc allocation size

Probably a typo error. We were using the wrong struct to get size.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e366be0792cb..419e2c3306aa 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -530,7 +530,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 						conn->feat_mask)
 						&& l2cap_pi(sk)->conf_state &
 						L2CAP_CONF_STATE2_DEVICE) {
-					tmp1 = kzalloc(sizeof(struct srej_list),
+					tmp1 = kzalloc(sizeof(struct sock_del_list),
 							GFP_ATOMIC);
 					tmp1->sk = sk;
 					list_add_tail(&tmp1->list, &del.list);
-- 
cgit v1.2.3-58-ga151


From 47731de789749c9ed3c54751db28fd9c9eeaf019 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 9 Jul 2010 16:38:35 -0300
Subject: Bluetooth: Keep code under column 80

Purely a cosmetic change, it doesn't change the code flow.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 419e2c3306aa..449cbdd4ddbb 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -522,31 +522,35 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 		}
 
 		if (sk->sk_state == BT_CONNECT) {
-			if (l2cap_check_security(sk) &&
-					__l2cap_no_conn_pending(sk)) {
-				struct l2cap_conn_req req;
+			struct l2cap_conn_req req;
 
-				if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
-						conn->feat_mask)
-						&& l2cap_pi(sk)->conf_state &
-						L2CAP_CONF_STATE2_DEVICE) {
-					tmp1 = kzalloc(sizeof(struct sock_del_list),
-							GFP_ATOMIC);
-					tmp1->sk = sk;
-					list_add_tail(&tmp1->list, &del.list);
-					bh_unlock_sock(sk);
-					continue;
-				}
+			if (!l2cap_check_security(sk) ||
+					!__l2cap_no_conn_pending(sk)) {
+				bh_unlock_sock(sk);
+				continue;
+			}
 
-				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
-				req.psm  = l2cap_pi(sk)->psm;
+			if (!l2cap_mode_supported(l2cap_pi(sk)->mode,
+					conn->feat_mask)
+					&& l2cap_pi(sk)->conf_state &
+					L2CAP_CONF_STATE2_DEVICE) {
+				tmp1 = kzalloc(sizeof(struct sock_del_list),
+						GFP_ATOMIC);
+				tmp1->sk = sk;
+				list_add_tail(&tmp1->list, &del.list);
+				bh_unlock_sock(sk);
+				continue;
+			}
 
-				l2cap_pi(sk)->ident = l2cap_get_ident(conn);
-				l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
+			req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+			req.psm  = l2cap_pi(sk)->psm;
+
+			l2cap_pi(sk)->ident = l2cap_get_ident(conn);
+			l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
+
+			l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
+				L2CAP_CONN_REQ, sizeof(req), &req);
 
-				l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
-					L2CAP_CONN_REQ, sizeof(req), &req);
-			}
 		} else if (sk->sk_state == BT_CONNECT2) {
 			struct l2cap_conn_rsp rsp;
 			char buf[128];
-- 
cgit v1.2.3-58-ga151


From ce5706bd69be6b25715ed6cd48a210b5080032bc Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 13 Jul 2010 11:57:11 -0300
Subject: Bluetooth: Add Copyright notice to L2CAP

Copyright for the time I worked on L2CAP during the Google Summer of Code
program.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 net/bluetooth/l2cap.c         | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 7df70e47c878..d0303021aba7 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -1,6 +1,7 @@
 /* 
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 449cbdd4ddbb..67a6f59873aa 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1,6 +1,7 @@
 /*
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
+   Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-58-ga151


From dd135240e8e10295f7e7cdf347800df6e1841437 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 13 Jul 2010 11:57:12 -0300
Subject: Bluetooth: Update L2CAP version information

We did some changes on the L2CAP configuration process and its behaviour
is bit different now. That justifies a updated on the L2CAP version.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 67a6f59873aa..cdd608d72741 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -54,7 +54,7 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/l2cap.h>
 
-#define VERSION "2.14"
+#define VERSION "2.15"
 
 static int enable_ertm = 0;
 
-- 
cgit v1.2.3-58-ga151


From cd4c53919ed50b0d532f106aeb76e79077bece98 Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:16 +0530
Subject: Bluetooth: Add one more buffer for HCI stream reassembly

Additional reassembly buffer to keep track of stream reasembly

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 4 ++--
 net/bluetooth/hci_core.c         | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 600372d44578..28e5eeefdec8 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -66,7 +66,7 @@ struct bdaddr_list {
 	struct list_head list;
 	bdaddr_t bdaddr;
 };
-
+#define NUM_REASSEMBLY 4
 struct hci_dev {
 	struct list_head list;
 	spinlock_t	lock;
@@ -123,7 +123,7 @@ struct hci_dev {
 	struct sk_buff_head	cmd_q;
 
 	struct sk_buff		*sent_cmd;
-	struct sk_buff		*reassembly[3];
+	struct sk_buff		*reassembly[NUM_REASSEMBLY];
 
 	struct mutex		req_lock;
 	wait_queue_head_t	req_wait_q;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index aeb2982310a0..0ded790bfb67 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -914,7 +914,7 @@ int hci_register_dev(struct hci_dev *hdev)
 	skb_queue_head_init(&hdev->cmd_q);
 	skb_queue_head_init(&hdev->raw_q);
 
-	for (i = 0; i < 3; i++)
+	for (i = 0; i < NUM_REASSEMBLY; i++)
 		hdev->reassembly[i] = NULL;
 
 	init_waitqueue_head(&hdev->req_wait_q);
@@ -973,7 +973,7 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_dev_do_close(hdev);
 
-	for (i = 0; i < 3; i++)
+	for (i = 0; i < NUM_REASSEMBLY; i++)
 		kfree_skb(hdev->reassembly[i]);
 
 	hci_notify(hdev, HCI_DEV_UNREG);
@@ -1034,7 +1034,7 @@ int hci_recv_frame(struct sk_buff *skb)
 EXPORT_SYMBOL(hci_recv_frame);
 
 /* Receive packet type fragment */
-#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 2])
+#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
 
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 {
-- 
cgit v1.2.3-58-ga151


From 33e882a5f2301a23a85ef2994e30fd9f48d39d9b Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:17 +0530
Subject: Bluetooth: Implement hci_reassembly helper to reassemble RX packets

Implements feature to reassemble received HCI frames from any input stream

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |   1 +
 net/bluetooth/hci_core.c          | 109 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index ff77e8f882f1..d6b150c679ff 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -138,6 +138,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
 struct bt_skb_cb {
 	__u8 pkt_type;
 	__u8 incoming;
+	__u16 expect;
 	__u8 tx_seq;
 	__u8 retries;
 	__u8 sar;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0ded790bfb67..477c4a60a079 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1033,6 +1033,115 @@ int hci_recv_frame(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(hci_recv_frame);
 
+static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
+			  int count, __u8 index, gfp_t gfp_mask)
+{
+	int len = 0;
+	int hlen = 0;
+	int remain = count;
+	struct sk_buff *skb;
+	struct bt_skb_cb *scb;
+
+	if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) ||
+				index >= NUM_REASSEMBLY)
+		return -EILSEQ;
+
+	skb = hdev->reassembly[index];
+
+	if (!skb) {
+		switch (type) {
+		case HCI_ACLDATA_PKT:
+			len = HCI_MAX_FRAME_SIZE;
+			hlen = HCI_ACL_HDR_SIZE;
+			break;
+		case HCI_EVENT_PKT:
+			len = HCI_MAX_EVENT_SIZE;
+			hlen = HCI_EVENT_HDR_SIZE;
+			break;
+		case HCI_SCODATA_PKT:
+			len = HCI_MAX_SCO_SIZE;
+			hlen = HCI_SCO_HDR_SIZE;
+			break;
+		}
+
+		skb = bt_skb_alloc(len, gfp_mask);
+		if (!skb)
+			return -ENOMEM;
+
+		scb = (void *) skb->cb;
+		scb->expect = hlen;
+		scb->pkt_type = type;
+
+		skb->dev = (void *) hdev;
+		hdev->reassembly[index] = skb;
+	}
+
+	while (count) {
+		scb = (void *) skb->cb;
+		len = min(scb->expect, (__u16)count);
+
+		memcpy(skb_put(skb, len), data, len);
+
+		count -= len;
+		data += len;
+		scb->expect -= len;
+		remain = count;
+
+		switch (type) {
+		case HCI_EVENT_PKT:
+			if (skb->len == HCI_EVENT_HDR_SIZE) {
+				struct hci_event_hdr *h = hci_event_hdr(skb);
+				scb->expect = h->plen;
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+
+		case HCI_ACLDATA_PKT:
+			if (skb->len  == HCI_ACL_HDR_SIZE) {
+				struct hci_acl_hdr *h = hci_acl_hdr(skb);
+				scb->expect = __le16_to_cpu(h->dlen);
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+
+		case HCI_SCODATA_PKT:
+			if (skb->len == HCI_SCO_HDR_SIZE) {
+				struct hci_sco_hdr *h = hci_sco_hdr(skb);
+				scb->expect = h->dlen;
+
+				if (skb_tailroom(skb) < scb->expect) {
+					kfree_skb(skb);
+					hdev->reassembly[index] = NULL;
+					return -ENOMEM;
+				}
+			}
+			break;
+		}
+
+		if (scb->expect == 0) {
+			/* Complete frame */
+
+			bt_cb(skb)->pkt_type = type;
+			hci_recv_frame(skb);
+
+			hdev->reassembly[index] = NULL;
+			return remain;
+		}
+	}
+
+	return remain;
+}
+
 /* Receive packet type fragment */
 #define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
 
-- 
cgit v1.2.3-58-ga151


From f39a3c06404d01ef2ce47e821bc778dfb1836df9 Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:18 +0530
Subject: Bluetooth: Modified hci_recv_fragment() to use hci_reassembly helper

Modified packet based reassembly function hci_recv_fragment() to use
hci_reassembly()

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 85 +++++++-----------------------------------------
 1 file changed, 11 insertions(+), 74 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 477c4a60a079..451e266840ad 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1142,87 +1142,24 @@ static int hci_reassembly(struct hci_dev *hdev, int type, void *data,
 	return remain;
 }
 
-/* Receive packet type fragment */
-#define __reassembly(hdev, type)  ((hdev)->reassembly[(type) - 1])
-
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 {
+	int rem = 0;
+
 	if (type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT)
 		return -EILSEQ;
 
-	while (count) {
-		struct sk_buff *skb = __reassembly(hdev, type);
-		struct { int expect; } *scb;
-		int len = 0;
-
-		if (!skb) {
-			/* Start of the frame */
-
-			switch (type) {
-			case HCI_EVENT_PKT:
-				if (count >= HCI_EVENT_HDR_SIZE) {
-					struct hci_event_hdr *h = data;
-					len = HCI_EVENT_HDR_SIZE + h->plen;
-				} else
-					return -EILSEQ;
-				break;
-
-			case HCI_ACLDATA_PKT:
-				if (count >= HCI_ACL_HDR_SIZE) {
-					struct hci_acl_hdr *h = data;
-					len = HCI_ACL_HDR_SIZE + __le16_to_cpu(h->dlen);
-				} else
-					return -EILSEQ;
-				break;
-
-			case HCI_SCODATA_PKT:
-				if (count >= HCI_SCO_HDR_SIZE) {
-					struct hci_sco_hdr *h = data;
-					len = HCI_SCO_HDR_SIZE + h->dlen;
-				} else
-					return -EILSEQ;
-				break;
-			}
-
-			skb = bt_skb_alloc(len, GFP_ATOMIC);
-			if (!skb) {
-				BT_ERR("%s no memory for packet", hdev->name);
-				return -ENOMEM;
-			}
-
-			skb->dev = (void *) hdev;
-			bt_cb(skb)->pkt_type = type;
-
-			__reassembly(hdev, type) = skb;
-
-			scb = (void *) skb->cb;
-			scb->expect = len;
-		} else {
-			/* Continuation */
-
-			scb = (void *) skb->cb;
-			len = scb->expect;
-		}
-
-		len = min(len, count);
-
-		memcpy(skb_put(skb, len), data, len);
-
-		scb->expect -= len;
-
-		if (scb->expect == 0) {
-			/* Complete frame */
+	do {
+		rem = hci_reassembly(hdev, type, data, count,
+						type - 1, GFP_ATOMIC);
+		if (rem < 0)
+			return rem;
 
-			__reassembly(hdev, type) = NULL;
+		data += (count - rem);
+		count = rem;
+	} while (count);
 
-			bt_cb(skb)->pkt_type = type;
-			hci_recv_frame(skb);
-		}
-
-		count -= len; data += len;
-	}
-
-	return 0;
+	return rem;
 }
 EXPORT_SYMBOL(hci_recv_fragment);
 
-- 
cgit v1.2.3-58-ga151


From 9981151086385eecc2febf4ba95a14593f834b3d Mon Sep 17 00:00:00 2001
From: Suraj Sumangala <suraj@atheros.com>
Date: Wed, 14 Jul 2010 13:02:19 +0530
Subject: Bluetooth: Implemented HCI frame reassembly for RX from stream

Implemented frame reassembly implementation for reassembling fragments
received from stream.

Signed-off-by: Suraj Sumangala <suraj@atheros.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 28e5eeefdec8..350b3e6964bd 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -437,6 +437,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb);
 
 int hci_recv_frame(struct sk_buff *skb);
 int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count);
+int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count);
 
 int hci_register_sysfs(struct hci_dev *hdev);
 void hci_unregister_sysfs(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 451e266840ad..995c9f9b84d0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1163,6 +1163,41 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count)
 }
 EXPORT_SYMBOL(hci_recv_fragment);
 
+#define STREAM_REASSEMBLY 0
+
+int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count)
+{
+	int type;
+	int rem = 0;
+
+	do {
+		struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY];
+
+		if (!skb) {
+			struct { char type; } *pkt;
+
+			/* Start of the frame */
+			pkt = data;
+			type = pkt->type;
+
+			data++;
+			count--;
+		} else
+			type = bt_cb(skb)->pkt_type;
+
+		rem = hci_reassembly(hdev, type, data,
+					count, STREAM_REASSEMBLY, GFP_ATOMIC);
+		if (rem < 0)
+			return rem;
+
+		data += (count - rem);
+		count = rem;
+	} while (count);
+
+	return rem;
+}
+EXPORT_SYMBOL(hci_recv_stream_fragment);
+
 /* ---- Interface to upper protocols ---- */
 
 /* Register/Unregister protocols.
-- 
cgit v1.2.3-58-ga151


From 5d8868ff3d11e3fc5a5c07477f281a16c71714a3 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Fri, 16 Jul 2010 16:18:39 -0300
Subject: Bluetooth: Add Google's copyright to L2CAP

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 net/bluetooth/l2cap.c         | 1 +
 2 files changed, 2 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d0303021aba7..4b86c783976c 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -2,6 +2,7 @@
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
    Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
+   Copyright (C) 2010 Google Inc.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cdd608d72741..d175cc262833 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2,6 +2,7 @@
    BlueZ - Bluetooth protocol stack for Linux
    Copyright (C) 2000-2001 Qualcomm Incorporated
    Copyright (C) 2009-2010 Gustavo F. Padovan <gustavo@padovan.org>
+   Copyright (C) 2010 Google Inc.
 
    Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com>
 
-- 
cgit v1.2.3-58-ga151


From 893ef9711286d01763f3c175385d70076d5f9546 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sun, 18 Jul 2010 15:13:37 -0300
Subject: Bluetooth: Fix typo in hci_event.c

memmory -> memory

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a96980022305..2069c3b05fda 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -584,7 +584,7 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
 				conn->out = 1;
 				conn->link_mode |= HCI_LM_MASTER;
 			} else
-				BT_ERR("No memmory for new connection");
+				BT_ERR("No memory for new connection");
 		}
 	}
 
@@ -965,7 +965,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 		conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr);
 		if (!conn) {
 			if (!(conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr))) {
-				BT_ERR("No memmory for new connection");
+				BT_ERR("No memory for new connection");
 				hci_dev_unlock(hdev);
 				return;
 			}
-- 
cgit v1.2.3-58-ga151


From d1c4a17d58a6dfacb48935aa430aa986559a885f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sun, 18 Jul 2010 16:25:54 -0300
Subject: Bluetooth: Enable L2CAP Extended features by default

Change the enable_ertm param to disable_ertm and default value to 0. That
means that L2CAP Extended features are enabled by default now.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index d175cc262833..9ba1e8eee37c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -57,7 +57,7 @@
 
 #define VERSION "2.15"
 
-static int enable_ertm = 0;
+static int disable_ertm = 0;
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
@@ -464,7 +464,7 @@ static void l2cap_do_start(struct sock *sk)
 static inline int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
 {
 	u32 local_feat_mask = l2cap_feat_mask;
-	if (enable_ertm)
+	if (!disable_ertm)
 		local_feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
 
 	switch (mode) {
@@ -903,7 +903,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	} else {
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
-		if (enable_ertm && sk->sk_type == SOCK_STREAM) {
+		if (!disable_ertm && sk->sk_type == SOCK_STREAM) {
 			pi->mode = L2CAP_MODE_ERTM;
 			pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
 		} else {
@@ -1160,7 +1160,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 		break;
 	case L2CAP_MODE_ERTM:
 	case L2CAP_MODE_STREAMING:
-		if (enable_ertm)
+		if (!disable_ertm)
 			break;
 		/* fall through */
 	default:
@@ -1226,7 +1226,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 		break;
 	case L2CAP_MODE_ERTM:
 	case L2CAP_MODE_STREAMING:
-		if (enable_ertm)
+		if (!disable_ertm)
 			break;
 		/* fall through */
 	default:
@@ -1986,7 +1986,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 			break;
 		case L2CAP_MODE_ERTM:
 		case L2CAP_MODE_STREAMING:
-			if (enable_ertm)
+			if (!disable_ertm)
 				break;
 			/* fall through */
 		default:
@@ -3302,7 +3302,7 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
 		struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
 		rsp->type   = cpu_to_le16(L2CAP_IT_FEAT_MASK);
 		rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
-		if (enable_ertm)
+		if (!disable_ertm)
 			feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
 							 | L2CAP_FEAT_FCS;
 		put_unaligned_le32(feat_mask, rsp->data);
@@ -4850,8 +4850,8 @@ EXPORT_SYMBOL(l2cap_load);
 module_init(l2cap_init);
 module_exit(l2cap_exit);
 
-module_param(enable_ertm, bool, 0644);
-MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode");
+module_param(disable_ertm, bool, 0644);
+MODULE_PARM_DESC(disable_ertm, "Disable enhanced retransmission mode");
 
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
-- 
cgit v1.2.3-58-ga151