summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c56
-rw-r--r--net/core/dst.c6
-rw-r--r--net/core/ethtool.c309
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/core/neighbour.c11
-rw-r--r--net/core/net-sysfs.c23
-rw-r--r--net/core/net-traces.c2
-rw-r--r--net/core/net_namespace.c76
-rw-r--r--net/core/netpoll.c20
-rw-r--r--net/core/rtnetlink.c69
-rw-r--r--net/core/skbuff.c80
-rw-r--r--net/core/sock.c5
-rw-r--r--net/core/timestamping.c2
13 files changed, 284 insertions, 381 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index c7e305d13b71..9ca15142d823 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -199,6 +199,11 @@ static struct list_head ptype_all __read_mostly; /* Taps */
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+static inline void dev_base_seq_inc(struct net *net)
+{
+ while (++net->dev_base_seq == 0);
+}
+
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
@@ -237,6 +242,9 @@ static int list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
write_unlock_bh(&dev_base_lock);
+
+ dev_base_seq_inc(net);
+
return 0;
}
@@ -253,6 +261,8 @@ static void unlist_netdevice(struct net_device *dev)
hlist_del_rcu(&dev->name_hlist);
hlist_del_rcu(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
+
+ dev_base_seq_inc(dev_net(dev));
}
/*
@@ -2096,6 +2106,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc = NETDEV_TX_OK;
+ unsigned int skb_len;
if (likely(!skb->next)) {
u32 features;
@@ -2146,8 +2157,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
+ skb_len = skb->len;
rc = ops->ndo_start_xmit(skb, dev);
- trace_net_dev_xmit(skb, rc);
+ trace_net_dev_xmit(skb, rc, dev, skb_len);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
@@ -2167,8 +2179,9 @@ gso:
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
+ skb_len = nskb->len;
rc = ops->ndo_start_xmit(nskb, dev);
- trace_net_dev_xmit(nskb, rc);
+ trace_net_dev_xmit(nskb, rc, dev, skb_len);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
goto out_kfree_gso_skb;
@@ -2529,7 +2542,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb)
goto done;
ip = (const struct iphdr *) (skb->data + nhoff);
- if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+ if (ip_is_fragment(ip))
ip_proto = 0;
else
ip_proto = ip->protocol;
@@ -3111,7 +3124,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
- skb->mac_len = skb->network_header - skb->mac_header;
+ skb_reset_mac_len(skb);
pt_prev = NULL;
@@ -5475,11 +5488,12 @@ int register_netdevice(struct net_device *dev)
dev->features |= NETIF_F_NOCACHE_COPY;
}
- /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
- * vlan_dev_init() will do the dev->features check, so these features
- * are enabled only if supported by underlying device.
+ /* Enable GSO, GRO and NETIF_F_HIGHDMA for vlans by default,
+ * vlan_dev_fix_features() will do the features check,
+ * so NETIF_F_HIGHDMA feature is enabled only if supported
+ * by underlying device.
*/
- dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
+ dev->vlan_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_HIGHDMA);
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret);
@@ -5864,8 +5878,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE;
- INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
- dev->ethtool_ntuple_list.count = 0;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
@@ -5929,9 +5941,6 @@ void free_netdev(struct net_device *dev)
/* Flush device addresses */
dev_addr_flush(dev);
- /* Clear ethtool n-tuple list */
- ethtool_ntuple_flush(dev);
-
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
@@ -6175,6 +6184,11 @@ static int dev_cpu_callback(struct notifier_block *nfb,
oldsd->output_queue = NULL;
oldsd->output_queue_tailp = &oldsd->output_queue;
}
+ /* Append NAPI poll list from offline CPU. */
+ if (!list_empty(&oldsd->poll_list)) {
+ list_splice_init(&oldsd->poll_list, &sd->poll_list);
+ raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ }
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
@@ -6261,29 +6275,23 @@ err_name:
/**
* netdev_drivername - network driver for the device
* @dev: network device
- * @buffer: buffer for resulting name
- * @len: size of buffer
*
* Determine network driver for device.
*/
-char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
+const char *netdev_drivername(const struct net_device *dev)
{
const struct device_driver *driver;
const struct device *parent;
-
- if (len <= 0 || !buffer)
- return buffer;
- buffer[0] = 0;
+ const char *empty = "";
parent = dev->dev.parent;
-
if (!parent)
- return buffer;
+ return empty;
driver = parent->driver;
if (driver && driver->name)
- strlcpy(buffer, driver->name, len);
- return buffer;
+ return driver->name;
+ return empty;
}
static int __netdev_printk(const char *level, const struct net_device *dev,
diff --git a/net/core/dst.c b/net/core/dst.c
index 9ccca038444f..6135f3671692 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -190,7 +190,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->lastuse = jiffies;
dst->flags = flags;
dst->next = NULL;
- dst_entries_add(ops, 1);
+ if (!(flags & DST_NOCOUNT))
+ dst_entries_add(ops, 1);
return dst;
}
EXPORT_SYMBOL(dst_alloc);
@@ -243,7 +244,8 @@ again:
neigh_release(neigh);
}
- dst_entries_add(dst->ops, -1);
+ if (!(dst->flags & DST_NOCOUNT))
+ dst_entries_add(dst->ops, -1);
if (dst->ops->destroy)
dst->ops->destroy(dst);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index fd14116ad7f0..b7c12a63d0ce 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -169,18 +169,6 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
}
EXPORT_SYMBOL(ethtool_op_set_flags);
-void ethtool_ntuple_flush(struct net_device *dev)
-{
- struct ethtool_rx_ntuple_flow_spec_container *fsc, *f;
-
- list_for_each_entry_safe(fsc, f, &dev->ethtool_ntuple_list.list, list) {
- list_del(&fsc->list);
- kfree(fsc);
- }
- dev->ethtool_ntuple_list.count = 0;
-}
-EXPORT_SYMBOL(ethtool_ntuple_flush);
-
/* Handlers for each ethtool command */
#define ETHTOOL_DEV_FEATURE_WORDS 1
@@ -865,34 +853,6 @@ out:
return ret;
}
-static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
- struct ethtool_rx_ntuple_flow_spec *spec,
- struct ethtool_rx_ntuple_flow_spec_container *fsc)
-{
-
- /* don't add filters forever */
- if (list->count >= ETHTOOL_MAX_NTUPLE_LIST_ENTRY) {
- /* free the container */
- kfree(fsc);
- return;
- }
-
- /* Copy the whole filter over */
- fsc->fs.flow_type = spec->flow_type;
- memcpy(&fsc->fs.h_u, &spec->h_u, sizeof(spec->h_u));
- memcpy(&fsc->fs.m_u, &spec->m_u, sizeof(spec->m_u));
-
- fsc->fs.vlan_tag = spec->vlan_tag;
- fsc->fs.vlan_tag_mask = spec->vlan_tag_mask;
- fsc->fs.data = spec->data;
- fsc->fs.data_mask = spec->data_mask;
- fsc->fs.action = spec->action;
-
- /* add to the list */
- list_add_tail_rcu(&fsc->list, &list->list);
- list->count++;
-}
-
/*
* ethtool does not (or did not) set masks for flow parameters that are
* not specified, so if both value and mask are 0 then this must be
@@ -930,8 +890,6 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
{
struct ethtool_rx_ntuple cmd;
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_rx_ntuple_flow_spec_container *fsc = NULL;
- int ret;
if (!ops->set_rx_ntuple)
return -EOPNOTSUPP;
@@ -944,269 +902,7 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
rx_ntuple_fix_masks(&cmd.fs);
- /*
- * Cache filter in dev struct for GET operation only if
- * the underlying driver doesn't have its own GET operation, and
- * only if the filter was added successfully. First make sure we
- * can allocate the filter, then continue if successful.
- */
- if (!ops->get_rx_ntuple) {
- fsc = kmalloc(sizeof(*fsc), GFP_ATOMIC);
- if (!fsc)
- return -ENOMEM;
- }
-
- ret = ops->set_rx_ntuple(dev, &cmd);
- if (ret) {
- kfree(fsc);
- return ret;
- }
-
- if (!ops->get_rx_ntuple)
- __rx_ntuple_filter_add(&dev->ethtool_ntuple_list, &cmd.fs, fsc);
-
- return ret;
-}
-
-static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
-{
- struct ethtool_gstrings gstrings;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- struct ethtool_rx_ntuple_flow_spec_container *fsc;
- u8 *data;
- char *p;
- int ret, i, num_strings = 0;
-
- if (!ops->get_sset_count)
- return -EOPNOTSUPP;
-
- if (copy_from_user(&gstrings, useraddr, sizeof(gstrings)))
- return -EFAULT;
-
- ret = ops->get_sset_count(dev, gstrings.string_set);
- if (ret < 0)
- return ret;
-
- gstrings.len = ret;
-
- data = kzalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
- if (!data)
- return -ENOMEM;
-
- if (ops->get_rx_ntuple) {
- /* driver-specific filter grab */
- ret = ops->get_rx_ntuple(dev, gstrings.string_set, data);
- goto copy;
- }
-
- /* default ethtool filter grab */
- i = 0;
- p = (char *)data;
- list_for_each_entry(fsc, &dev->ethtool_ntuple_list.list, list) {
- sprintf(p, "Filter %d:\n", i);
- p += ETH_GSTRING_LEN;
- num_strings++;
-
- switch (fsc->fs.flow_type) {
- case TCP_V4_FLOW:
- sprintf(p, "\tFlow Type: TCP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case UDP_V4_FLOW:
- sprintf(p, "\tFlow Type: UDP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case SCTP_V4_FLOW:
- sprintf(p, "\tFlow Type: SCTP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case AH_ESP_V4_FLOW:
- sprintf(p, "\tFlow Type: AH ESP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case ESP_V4_FLOW:
- sprintf(p, "\tFlow Type: ESP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IP_USER_FLOW:
- sprintf(p, "\tFlow Type: Raw IP\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IPV4_FLOW:
- sprintf(p, "\tFlow Type: IPv4\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- default:
- sprintf(p, "\tFlow Type: Unknown\n");
- p += ETH_GSTRING_LEN;
- num_strings++;
- goto unknown_filter;
- }
-
- /* now the rest of the filters */
- switch (fsc->fs.flow_type) {
- case TCP_V4_FLOW:
- case UDP_V4_FLOW:
- case SCTP_V4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.tcp_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.psrc,
- fsc->fs.m_u.tcp_ip4_spec.psrc);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.pdst,
- fsc->fs.m_u.tcp_ip4_spec.pdst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.tcp_ip4_spec.tos,
- fsc->fs.m_u.tcp_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case AH_ESP_V4_FLOW:
- case ESP_V4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.ah_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSPI: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.spi,
- fsc->fs.m_u.ah_ip4_spec.spi);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.ah_ip4_spec.tos,
- fsc->fs.m_u.ah_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IP_USER_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- case IPV4_FLOW:
- sprintf(p, "\tSrc IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tSrc IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4src);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP addr: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tDest IP mask: 0x%x\n",
- fsc->fs.m_u.usr_ip4_spec.ip4dst);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
- fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tTOS: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.tos,
- fsc->fs.m_u.usr_ip4_spec.tos);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.ip_ver,
- fsc->fs.m_u.usr_ip4_spec.ip_ver);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
- fsc->fs.h_u.usr_ip4_spec.proto,
- fsc->fs.m_u.usr_ip4_spec.proto);
- p += ETH_GSTRING_LEN;
- num_strings++;
- break;
- }
- sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
- fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
- p += ETH_GSTRING_LEN;
- num_strings++;
- sprintf(p, "\tUser-defined mask: 0x%Lx\n", fsc->fs.data_mask);
- p += ETH_GSTRING_LEN;
- num_strings++;
- if (fsc->fs.action == ETHTOOL_RXNTUPLE_ACTION_DROP)
- sprintf(p, "\tAction: Drop\n");
- else
- sprintf(p, "\tAction: Direct to queue %d\n",
- fsc->fs.action);
- p += ETH_GSTRING_LEN;
- num_strings++;
-unknown_filter:
- i++;
- }
-copy:
- /* indicate to userspace how many strings we actually have */
- gstrings.len = num_strings;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &gstrings, sizeof(gstrings)))
- goto out;
- useraddr += sizeof(gstrings);
- if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN))
- goto out;
- ret = 0;
-
-out:
- kfree(data);
- return ret;
+ return ops->set_rx_ntuple(dev, &cmd);
}
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -2101,9 +1797,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_SRXNTUPLE:
rc = ethtool_set_rx_ntuple(dev, useraddr);
break;
- case ETHTOOL_GRXNTUPLE:
- rc = ethtool_get_rx_ntuple(dev, useraddr);
- break;
case ETHTOOL_GSSET_INFO:
rc = ethtool_get_sset_info(dev, useraddr);
break;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 008dc70b064b..e7ab0c0285b5 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -740,9 +740,9 @@ static struct pernet_operations fib_rules_net_ops = {
static int __init fib_rules_init(void)
{
int err;
- rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+ rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL);
err = register_pernet_subsys(&fib_rules_net_ops);
if (err < 0)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 799f06e03a22..ceb505b1507c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2909,12 +2909,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister);
static int __init neigh_init(void)
{
- rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
+ rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
- rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
+ NULL);
+ rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
return 0;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 11b98bc2aa8f..33d2a1fba131 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1179,9 +1179,14 @@ static void remove_queue_kobjects(struct net_device *net)
#endif
}
-static const void *net_current_ns(void)
+static void *net_grab_current_ns(void)
{
- return current->nsproxy->net_ns;
+ struct net *ns = current->nsproxy->net_ns;
+#ifdef CONFIG_NET_NS
+ if (ns)
+ atomic_inc(&ns->passive);
+#endif
+ return ns;
}
static const void *net_initial_ns(void)
@@ -1196,22 +1201,13 @@ static const void *net_netlink_ns(struct sock *sk)
struct kobj_ns_type_operations net_ns_type_operations = {
.type = KOBJ_NS_TYPE_NET,
- .current_ns = net_current_ns,
+ .grab_current_ns = net_grab_current_ns,
.netlink_ns = net_netlink_ns,
.initial_ns = net_initial_ns,
+ .drop_ns = net_drop_ns,
};
EXPORT_SYMBOL_GPL(net_ns_type_operations);
-static void net_kobj_ns_exit(struct net *net)
-{
- kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
-}
-
-static struct pernet_operations kobj_net_ops = {
- .exit = net_kobj_ns_exit,
-};
-
-
#ifdef CONFIG_HOTPLUG
static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
{
@@ -1339,6 +1335,5 @@ EXPORT_SYMBOL(netdev_class_remove_file);
int netdev_kobject_init(void)
{
kobj_ns_type_register(&net_ns_type_operations);
- register_pernet_subsys(&kobj_net_ops);
return class_register(&net_class);
}
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 7f1bb2aba03b..52380b1d552a 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -28,6 +28,8 @@
#include <trace/events/skb.h>
#include <trace/events/net.h>
#include <trace/events/napi.h>
+#include <trace/events/sock.h>
+#include <trace/events/udp.h>
EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2e2dce6583e1..5bbdbf0d3664 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
#include <linux/idr.h>
#include <linux/rculist.h>
#include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -126,6 +128,8 @@ static __net_init int setup_net(struct net *net)
LIST_HEAD(net_exit_list);
atomic_set(&net->count, 1);
+ atomic_set(&net->passive, 1);
+ net->dev_base_seq = 1;
#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
@@ -208,6 +212,13 @@ static void net_free(struct net *net)
kmem_cache_free(net_cachep, net);
}
+void net_drop_ns(void *p)
+{
+ struct net *ns = p;
+ if (ns && atomic_dec_and_test(&ns->passive))
+ net_free(ns);
+}
+
struct net *copy_net_ns(unsigned long flags, struct net *old_net)
{
struct net *net;
@@ -228,7 +239,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
}
mutex_unlock(&net_mutex);
if (rv < 0) {
- net_free(net);
+ net_drop_ns(net);
return ERR_PTR(rv);
}
return net;
@@ -284,7 +295,7 @@ static void cleanup_net(struct work_struct *work)
/* Finally it is safe to free my network namespace structure */
list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
list_del_init(&net->exit_list);
- net_free(net);
+ net_drop_ns(net);
}
}
static DECLARE_WORK(net_cleanup_work, cleanup_net);
@@ -302,6 +313,26 @@ void __put_net(struct net *net)
}
EXPORT_SYMBOL_GPL(__put_net);
+struct net *get_net_ns_by_fd(int fd)
+{
+ struct proc_inode *ei;
+ struct file *file;
+ struct net *net;
+
+ file = proc_ns_fget(fd);
+ if (IS_ERR(file))
+ return ERR_CAST(file);
+
+ ei = PROC_I(file->f_dentry->d_inode);
+ if (ei->ns_ops == &netns_operations)
+ net = get_net(ei->ns);
+ else
+ net = ERR_PTR(-EINVAL);
+
+ fput(file);
+ return net;
+}
+
#else
struct net *copy_net_ns(unsigned long flags, struct net *old_net)
{
@@ -309,6 +340,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
return ERR_PTR(-EINVAL);
return old_net;
}
+
+struct net *get_net_ns_by_fd(int fd)
+{
+ return ERR_PTR(-EINVAL);
+}
#endif
struct net *get_net_ns_by_pid(pid_t pid)
@@ -561,3 +597,39 @@ void unregister_pernet_device(struct pernet_operations *ops)
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+#ifdef CONFIG_NET_NS
+static void *netns_get(struct task_struct *task)
+{
+ struct net *net = NULL;
+ struct nsproxy *nsproxy;
+
+ rcu_read_lock();
+ nsproxy = task_nsproxy(task);
+ if (nsproxy)
+ net = get_net(nsproxy->net_ns);
+ rcu_read_unlock();
+
+ return net;
+}
+
+static void netns_put(void *ns)
+{
+ put_net(ns);
+}
+
+static int netns_install(struct nsproxy *nsproxy, void *ns)
+{
+ put_net(nsproxy->net_ns);
+ nsproxy->net_ns = get_net(ns);
+ return 0;
+}
+
+const struct proc_ns_operations netns_operations = {
+ .name = "net",
+ .type = CLONE_NEWNET,
+ .get = netns_get,
+ .put = netns_put,
+ .install = netns_install,
+};
+#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 2d7d6d473781..adf84dd8c7b5 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -177,7 +177,7 @@ static void service_arp_queue(struct netpoll_info *npi)
}
}
-void netpoll_poll_dev(struct net_device *dev)
+static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
@@ -208,13 +208,6 @@ void netpoll_poll_dev(struct net_device *dev)
zap_completion_queue();
}
-EXPORT_SYMBOL(netpoll_poll_dev);
-
-void netpoll_poll(struct netpoll *np)
-{
- netpoll_poll_dev(np->dev);
-}
-EXPORT_SYMBOL(netpoll_poll);
static void refill_skbs(void)
{
@@ -275,7 +268,7 @@ repeat:
if (!skb) {
if (++count < 10) {
- netpoll_poll(np);
+ netpoll_poll_dev(np->dev);
goto repeat;
}
return NULL;
@@ -336,7 +329,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
}
/* tickle device maybe there is some cleanup */
- netpoll_poll(np);
+ netpoll_poll_dev(np->dev);
udelay(USEC_PER_POLL);
}
@@ -792,6 +785,13 @@ int netpoll_setup(struct netpoll *np)
return -ENODEV;
}
+ if (ndev->master) {
+ printk(KERN_ERR "%s: %s is a slave device, aborting.\n",
+ np->name, np->dev_name);
+ err = -EBUSY;
+ goto put;
+ }
+
if (!netif_running(ndev)) {
unsigned long atmost, atleast;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d56cb9b0b94..99d9e953fe39 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -56,9 +56,11 @@
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
+ rtnl_calcit_func calcit;
};
static DEFINE_MUTEX(rtnl_mutex);
+static u16 min_ifinfo_dump_size;
void rtnl_lock(void)
{
@@ -144,12 +146,28 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
return tab ? tab[msgindex].dumpit : NULL;
}
+static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex)
+{
+ struct rtnl_link *tab;
+
+ if (protocol <= RTNL_FAMILY_MAX)
+ tab = rtnl_msg_handlers[protocol];
+ else
+ tab = NULL;
+
+ if (tab == NULL || tab[msgindex].calcit == NULL)
+ tab = rtnl_msg_handlers[PF_UNSPEC];
+
+ return tab ? tab[msgindex].calcit : NULL;
+}
+
/**
* __rtnl_register - Register a rtnetlink message type
* @protocol: Protocol family or PF_UNSPEC
* @msgtype: rtnetlink message type
* @doit: Function pointer called for each request message
* @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
+ * @calcit: Function pointer to calc size of dump message
*
* Registers the specified function pointers (at least one of them has
* to be non-NULL) to be called whenever a request message for the
@@ -162,7 +180,8 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
* Returns 0 on success or a negative error code.
*/
int __rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ rtnl_calcit_func calcit)
{
struct rtnl_link *tab;
int msgindex;
@@ -185,6 +204,9 @@ int __rtnl_register(int protocol, int msgtype,
if (dumpit)
tab[msgindex].dumpit = dumpit;
+ if (calcit)
+ tab[msgindex].calcit = calcit;
+
return 0;
}
EXPORT_SYMBOL_GPL(__rtnl_register);
@@ -199,9 +221,10 @@ EXPORT_SYMBOL_GPL(__rtnl_register);
* of memory implies no sense in continuing.
*/
void rtnl_register(int protocol, int msgtype,
- rtnl_doit_func doit, rtnl_dumpit_func dumpit)
+ rtnl_doit_func doit, rtnl_dumpit_func dumpit,
+ rtnl_calcit_func calcit)
{
- if (__rtnl_register(protocol, msgtype, doit, dumpit) < 0)
+ if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0)
panic("Unable to register rtnetlink message handler, "
"protocol = %d, message type = %d\n",
protocol, msgtype);
@@ -1009,6 +1032,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
s_idx = cb->args[1];
rcu_read_lock();
+ cb->seq = net->dev_base_seq;
+
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &net->dev_index_head[h];
@@ -1020,6 +1045,8 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, 0,
NLM_F_MULTI) <= 0)
goto out;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
idx++;
}
@@ -1046,6 +1073,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKMODE] = { .type = NLA_U8 },
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
+ [IFLA_NET_NS_FD] = { .type = NLA_U32 },
[IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
@@ -1094,6 +1122,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
*/
if (tb[IFLA_NET_NS_PID])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+ else if (tb[IFLA_NET_NS_FD])
+ net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
else
net = get_net(src_net);
return net;
@@ -1224,7 +1254,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
int send_addr_notify = 0;
int err;
- if (tb[IFLA_NET_NS_PID]) {
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
struct net *net = rtnl_link_get_net(dev_net(dev), tb);
if (IS_ERR(net)) {
err = PTR_ERR(net);
@@ -1815,6 +1845,11 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
return err;
}
+static u16 rtnl_calcit(struct sk_buff *skb)
+{
+ return min_ifinfo_dump_size;
+}
+
static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
{
int idx;
@@ -1844,11 +1879,14 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
+ size_t if_info_size;
- skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+ skb = nlmsg_new((if_info_size = if_nlmsg_size(dev)), GFP_KERNEL);
if (skb == NULL)
goto errout;
+ min_ifinfo_dump_size = max_t(u16, if_info_size, min_ifinfo_dump_size);
+
err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
@@ -1899,14 +1937,20 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
struct sock *rtnl;
rtnl_dumpit_func dumpit;
+ rtnl_calcit_func calcit;
+ u16 min_dump_alloc = 0;
dumpit = rtnl_get_dumpit(family, type);
if (dumpit == NULL)
return -EOPNOTSUPP;
+ calcit = rtnl_get_calcit(family, type);
+ if (calcit)
+ min_dump_alloc = calcit(skb);
__rtnl_unlock();
rtnl = net->rtnl;
- err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+ err = netlink_dump_start(rtnl, skb, nlh, dumpit,
+ NULL, min_dump_alloc);
rtnl_lock();
return err;
}
@@ -2016,12 +2060,13 @@ void __init rtnetlink_init(void)
netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
register_netdevice_notifier(&rtnetlink_dev_notifier);
- rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
- rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
- rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL);
- rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink,
+ rtnl_dump_ifinfo, rtnl_calcit);
+ rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL);
- rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
- rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
+ rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 46cbd28f40f9..a9577a2f3a43 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -329,6 +329,18 @@ static void skb_release_data(struct sk_buff *skb)
put_page(skb_shinfo(skb)->frags[i].page);
}
+ /*
+ * If skb buf is from userspace, we need to notify the caller
+ * the lower device DMA has done;
+ */
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ struct ubuf_info *uarg;
+
+ uarg = skb_shinfo(skb)->destructor_arg;
+ if (uarg->callback)
+ uarg->callback(uarg);
+ }
+
if (skb_has_frag_list(skb))
skb_drop_fraglist(skb);
@@ -481,6 +493,9 @@ bool skb_recycle_check(struct sk_buff *skb, int skb_size)
if (irqs_disabled())
return false;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)
+ return false;
+
if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE)
return false;
@@ -596,6 +611,51 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
}
EXPORT_SYMBOL_GPL(skb_morph);
+/* skb frags copy userspace buffers to kernel */
+static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
+{
+ int i;
+ int num_frags = skb_shinfo(skb)->nr_frags;
+ struct page *page, *head = NULL;
+ struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg;
+
+ for (i = 0; i < num_frags; i++) {
+ u8 *vaddr;
+ skb_frag_t *f = &skb_shinfo(skb)->frags[i];
+
+ page = alloc_page(GFP_ATOMIC);
+ if (!page) {
+ while (head) {
+ struct page *next = (struct page *)head->private;
+ put_page(head);
+ head = next;
+ }
+ return -ENOMEM;
+ }
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
+ memcpy(page_address(page),
+ vaddr + f->page_offset, f->size);
+ kunmap_skb_frag(vaddr);
+ page->private = (unsigned long)head;
+ head = page;
+ }
+
+ /* skb frags release userspace buffers */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ put_page(skb_shinfo(skb)->frags[i].page);
+
+ uarg->callback(uarg);
+
+ /* skb frags point to kernel buffers */
+ for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
+ skb_shinfo(skb)->frags[i - 1].page_offset = 0;
+ skb_shinfo(skb)->frags[i - 1].page = head;
+ head = (struct page *)head->private;
+ }
+ return 0;
+}
+
+
/**
* skb_clone - duplicate an sk_buff
* @skb: buffer to clone
@@ -614,6 +674,11 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
{
struct sk_buff *n;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, gfp_mask))
+ return NULL;
+ }
+
n = skb + 1;
if (skb->fclone == SKB_FCLONE_ORIG &&
n->fclone == SKB_FCLONE_UNAVAILABLE) {
@@ -731,6 +796,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_shinfo(skb)->nr_frags) {
int i;
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, gfp_mask)) {
+ kfree(n);
+ goto out;
+ }
+ }
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
get_page(skb_shinfo(n)->frags[i].page);
@@ -788,7 +859,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
fastpath = true;
else {
int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
-
fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
}
@@ -819,6 +889,11 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (fastpath) {
kfree(skb->head);
} else {
+ /* copy this zero copy skb frags */
+ if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+ if (skb_copy_ubufs(skb, gfp_mask))
+ goto nofrags;
+ }
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
get_page(skb_shinfo(skb)->frags[i].page);
@@ -853,6 +928,8 @@ adjust_others:
atomic_set(&skb_shinfo(skb)->dataref, 1);
return 0;
+nofrags:
+ kfree(data);
nodata:
return -ENOMEM;
}
@@ -1354,6 +1431,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
}
start = end;
}
+
if (!len)
return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 84d6de809352..bc745d00ea4d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -128,6 +128,8 @@
#include <linux/filter.h>
+#include <trace/events/sock.h>
+
#ifdef CONFIG_INET
#include <net/tcp.h>
#endif
@@ -292,6 +294,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
(unsigned)sk->sk_rcvbuf) {
atomic_inc(&sk->sk_drops);
+ trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
}
@@ -1736,6 +1739,8 @@ suppress_allocation:
return 1;
}
+ trace_sock_exceed_buf_limit(sk, prot, allocated);
+
/* Alas. Undo changes. */
sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
atomic_long_sub(amt, prot->memory_allocated);
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 7e7ca375d431..98a52640e7cd 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -68,6 +68,7 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
break;
}
}
+EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
void skb_complete_tx_timestamp(struct sk_buff *skb,
struct skb_shared_hwtstamps *hwtstamps)
@@ -121,6 +122,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
return false;
}
+EXPORT_SYMBOL_GPL(skb_defer_rx_timestamp);
void __init skb_timestamping_init(void)
{