From 88d0895d0ea9d4431507d576c963f2ff9918144d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 31 Aug 2018 15:08:44 +0200 Subject: batman-adv: Avoid probe ELP information leak The probe ELPs for WiFi interfaces are expanded to contain at least BATADV_ELP_MIN_PROBE_SIZE bytes. This is usually a lot more than the number of bytes which the template ELP packet requires. These extra padding bytes were not initialized and thus could contain data which were previously stored at the same location. It is therefore required to set it to some predefined or random values to avoid leaking private information from the system transmitting these kind of packets. Fixes: e4623c913508 ("batman-adv: Avoid probe ELP information leak") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 71c20c1d4002..e103c759b7ab 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -241,7 +241,7 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) * the packet to be exactly of that size to make the link * throughput estimation effective. */ - skb_put(skb, probe_len - hard_iface->bat_v.elp_skb->len); + skb_put_zero(skb, probe_len - hard_iface->bat_v.elp_skb->len); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Sending unicast (probe) ELP packet on interface %s to %pM\n", -- cgit v1.2.3-58-ga151 From b9fd14c20871e6189f635e49b32d7789e430b3c8 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 31 Aug 2018 16:46:47 +0200 Subject: batman-adv: Fix segfault when writing to throughput_override The per hardif sysfs file "batman_adv/throughput_override" prints the resulting change as info text when the users writes to this file. It uses the helper function batadv_info to add it at the same time to the kernel ring buffer and to the batman-adv debug log (when CONFIG_BATMAN_ADV_DEBUG is enabled). The function batadv_info requires as first parameter the batman-adv softif net_device. This parameter is then used to find the private buffer which contains the debug log for this batman-adv interface. But batadv_store_throughput_override used as first argument the slave net_device. This slave device doesn't have the batadv_priv private data which is access by batadv_info. Writing to this file with CONFIG_BATMAN_ADV_DEBUG enabled can either lead to a segfault or to memory corruption. Fixes: 0b5ecc6811bd ("batman-adv: add throughput override attribute to hard_ifaces") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/sysfs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index f2eef43bd2ec..3a76e8970c02 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1090,8 +1090,9 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, if (old_tp_override == tp_override) goto out; - batadv_info(net_dev, "%s: Changing from: %u.%u MBit to: %u.%u MBit\n", - "throughput_override", + batadv_info(hard_iface->soft_iface, + "%s: %s: Changing from: %u.%u MBit to: %u.%u MBit\n", + "throughput_override", net_dev->name, old_tp_override / 10, old_tp_override % 10, tp_override / 10, tp_override % 10); -- cgit v1.2.3-58-ga151 From a25bab9d723a08bd0bdafb1529faf9094c690b70 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 31 Aug 2018 16:56:29 +0200 Subject: batman-adv: Fix segfault when writing to sysfs elp_interval The per hardif sysfs file "batman_adv/elp_interval" is using the generic functions to store/show uint values. The helper __batadv_store_uint_attr requires the softif net_device as parameter to print the resulting change as info text when the users writes to this file. It uses the helper function batadv_info to add it at the same time to the kernel ring buffer and to the batman-adv debug log (when CONFIG_BATMAN_ADV_DEBUG is enabled). The function batadv_info requires as first parameter the batman-adv softif net_device. This parameter is then used to find the private buffer which contains the debug log for this batman-adv interface. But batadv_store_throughput_override used as first argument the slave net_device. This slave device doesn't have the batadv_priv private data which is access by batadv_info. Writing to this file with CONFIG_BATMAN_ADV_DEBUG enabled can either lead to a segfault or to memory corruption. Fixes: 0744ff8fa8fa ("batman-adv: Add hard_iface specific sysfs wrapper macros for UINT") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/sysfs.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 3a76e8970c02..09427fc6494a 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -188,7 +188,8 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ \ return __batadv_store_uint_attr(buff, count, _min, _max, \ _post_func, attr, \ - &bat_priv->_var, net_dev); \ + &bat_priv->_var, net_dev, \ + NULL); \ } #define BATADV_ATTR_SIF_SHOW_UINT(_name, _var) \ @@ -262,7 +263,9 @@ ssize_t batadv_store_##_name(struct kobject *kobj, \ \ length = __batadv_store_uint_attr(buff, count, _min, _max, \ _post_func, attr, \ - &hard_iface->_var, net_dev); \ + &hard_iface->_var, \ + hard_iface->soft_iface, \ + net_dev); \ \ batadv_hardif_put(hard_iface); \ return length; \ @@ -356,10 +359,12 @@ __batadv_store_bool_attr(char *buff, size_t count, static int batadv_store_uint_attr(const char *buff, size_t count, struct net_device *net_dev, + struct net_device *slave_dev, const char *attr_name, unsigned int min, unsigned int max, atomic_t *attr) { + char ifname[IFNAMSIZ + 3] = ""; unsigned long uint_val; int ret; @@ -385,8 +390,11 @@ static int batadv_store_uint_attr(const char *buff, size_t count, if (atomic_read(attr) == uint_val) return count; - batadv_info(net_dev, "%s: Changing from: %i to: %lu\n", - attr_name, atomic_read(attr), uint_val); + if (slave_dev) + snprintf(ifname, sizeof(ifname), "%s: ", slave_dev->name); + + batadv_info(net_dev, "%s: %sChanging from: %i to: %lu\n", + attr_name, ifname, atomic_read(attr), uint_val); atomic_set(attr, uint_val); return count; @@ -397,12 +405,13 @@ static ssize_t __batadv_store_uint_attr(const char *buff, size_t count, void (*post_func)(struct net_device *), const struct attribute *attr, atomic_t *attr_store, - struct net_device *net_dev) + struct net_device *net_dev, + struct net_device *slave_dev) { int ret; - ret = batadv_store_uint_attr(buff, count, net_dev, attr->name, min, max, - attr_store); + ret = batadv_store_uint_attr(buff, count, net_dev, slave_dev, + attr->name, min, max, attr_store); if (post_func && ret) post_func(net_dev); @@ -571,7 +580,7 @@ static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect, attr, &bat_priv->gw.sel_class, - bat_priv->soft_iface); + bat_priv->soft_iface, NULL); } static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, -- cgit v1.2.3-58-ga151 From dff9bc42ab0b2d38c5e90ddd79b238fed5b4c7ad Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:41 +0200 Subject: batman-adv: Prevent duplicated gateway_node entry The function batadv_gw_node_add is responsible for adding new gw_node to the gateway_list. It is expecting that the caller already checked that there is not already an entry with the same key or not. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/gateway_client.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 8b198ee798c9..140c61a3f1ec 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -348,6 +349,9 @@ out: * @bat_priv: the bat priv with all the soft interface information * @orig_node: originator announcing gateway capabilities * @gateway: announced bandwidth information + * + * Has to be called with the appropriate locks being acquired + * (gw.list_lock). */ static void batadv_gw_node_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, @@ -355,6 +359,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, { struct batadv_gw_node *gw_node; + lockdep_assert_held(&bat_priv->gw.list_lock); + if (gateway->bandwidth_down == 0) return; @@ -369,10 +375,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); - spin_lock_bh(&bat_priv->gw.list_lock); kref_get(&gw_node->refcount); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.gateway_list); - spin_unlock_bh(&bat_priv->gw.list_lock); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Found new gateway %pM -> gw bandwidth: %u.%u/%u.%u MBit\n", @@ -428,11 +432,14 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, { struct batadv_gw_node *gw_node, *curr_gw = NULL; + spin_lock_bh(&bat_priv->gw.list_lock); gw_node = batadv_gw_node_get(bat_priv, orig_node); if (!gw_node) { batadv_gw_node_add(bat_priv, orig_node, gateway); + spin_unlock_bh(&bat_priv->gw.list_lock); goto out; } + spin_unlock_bh(&bat_priv->gw.list_lock); if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) && gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)) -- cgit v1.2.3-58-ga151 From fa122fec8640eb7186ce5a41b83a4c1744ceef8f Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:42 +0200 Subject: batman-adv: Prevent duplicated nc_node entry The function batadv_nc_get_nc_node is responsible for adding new nc_nodes to the in_coding_list and out_coding_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: d56b1705e28c ("batman-adv: network coding - detect coding nodes and remove these after timeout") Signed-off-by: Sven Eckelmann Acked-by: Marek Lindner Signed-off-by: Simon Wunderlich --- net/batman-adv/network-coding.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index c3578444f3cb..34caf129a9bf 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -854,16 +854,27 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ struct list_head *list; + /* Select ingoing or outgoing coding node */ + if (in_coding) { + lock = &orig_neigh_node->in_coding_list_lock; + list = &orig_neigh_node->in_coding_list; + } else { + lock = &orig_neigh_node->out_coding_list_lock; + list = &orig_neigh_node->out_coding_list; + } + + spin_lock_bh(lock); + /* Check if nc_node is already added */ nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding); /* Node found */ if (nc_node) - return nc_node; + goto unlock; nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC); if (!nc_node) - return NULL; + goto unlock; /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); @@ -872,22 +883,14 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, kref_get(&orig_neigh_node->refcount); nc_node->orig_node = orig_neigh_node; - /* Select ingoing or outgoing coding node */ - if (in_coding) { - lock = &orig_neigh_node->in_coding_list_lock; - list = &orig_neigh_node->in_coding_list; - } else { - lock = &orig_neigh_node->out_coding_list_lock; - list = &orig_neigh_node->out_coding_list; - } - batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n", nc_node->addr, nc_node->orig_node->orig); /* Add nc_node to orig_node */ - spin_lock_bh(lock); kref_get(&nc_node->refcount); list_add_tail_rcu(&nc_node->list, list); + +unlock: spin_unlock_bh(lock); return nc_node; -- cgit v1.2.3-58-ga151 From 94cb82f594ed86be303398d6dfc7640a6f1d45d4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:43 +0200 Subject: batman-adv: Prevent duplicated softif_vlan entry The function batadv_softif_vlan_get is responsible for adding new softif_vlan to the softif_vlan_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: 5d2c05b21337 ("batman-adv: add per VLAN interface attribute framework") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 1485263a348b..626ddca332db 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -574,15 +574,20 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) struct batadv_softif_vlan *vlan; int err; + spin_lock_bh(&bat_priv->softif_vlan_list_lock); + vlan = batadv_softif_vlan_get(bat_priv, vid); if (vlan) { batadv_softif_vlan_put(vlan); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return -EEXIST; } vlan = kzalloc(sizeof(*vlan), GFP_ATOMIC); - if (!vlan) + if (!vlan) { + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return -ENOMEM; + } vlan->bat_priv = bat_priv; vlan->vid = vid; @@ -590,17 +595,23 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) atomic_set(&vlan->ap_isolation, 0); + kref_get(&vlan->refcount); + hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); + + /* batadv_sysfs_add_vlan cannot be in the spinlock section due to the + * sleeping behavior of the sysfs functions and the fs_reclaim lock + */ err = batadv_sysfs_add_vlan(bat_priv->soft_iface, vlan); if (err) { - kfree(vlan); + /* ref for the function */ + batadv_softif_vlan_put(vlan); + + /* ref for the list */ + batadv_softif_vlan_put(vlan); return err; } - spin_lock_bh(&bat_priv->softif_vlan_list_lock); - kref_get(&vlan->refcount); - hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); - spin_unlock_bh(&bat_priv->softif_vlan_list_lock); - /* add a new TT local entry. This one will be marked with the NOPURGE * flag */ -- cgit v1.2.3-58-ga151 From e7136e48ffdfb9f37b0820f619380485eb407361 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:44 +0200 Subject: batman-adv: Prevent duplicated global TT entry The function batadv_tt_global_orig_entry_add is responsible for adding new tt_orig_list_entry to the orig_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: d657e621a0f5 ("batman-adv: add reference counting for type batadv_tt_orig_list_entry") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 12a2b7d21376..d21624c44665 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1613,6 +1613,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, { struct batadv_tt_orig_list_entry *orig_entry; + spin_lock_bh(&tt_global->list_lock); + orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node); if (orig_entry) { /* refresh the ttvn: the current value could be a bogus one that @@ -1635,11 +1637,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, orig_entry->flags = flags; kref_init(&orig_entry->refcount); - spin_lock_bh(&tt_global->list_lock); kref_get(&orig_entry->refcount); hlist_add_head_rcu(&orig_entry->list, &tt_global->orig_list); - spin_unlock_bh(&tt_global->list_lock); atomic_inc(&tt_global->orig_list_count); sync_flags: @@ -1647,6 +1647,8 @@ sync_flags: out: if (orig_entry) batadv_tt_orig_list_entry_put(orig_entry); + + spin_unlock_bh(&tt_global->list_lock); } /** -- cgit v1.2.3-58-ga151 From ae3cdc97dc10c7a3b31f297dab429bfb774c9ccb Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 12 Aug 2018 21:04:45 +0200 Subject: batman-adv: Prevent duplicated tvlv handler The function batadv_tvlv_handler_register is responsible for adding new tvlv_handler to the handler_list. It first checks whether the entry already is in the list or not. If it is, then the creation of a new entry is aborted. But the lock for the list is only held when the list is really modified. This could lead to duplicated entries because another context could create an entry with the same key between the check and the list manipulation. The check and the manipulation of the list must therefore be in the same locked code section. Fixes: ef26157747d4 ("batman-adv: tvlv - basic infrastructure") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/tvlv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index a637458205d1..40e69c9346d2 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -529,15 +529,20 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, { struct batadv_tvlv_handler *tvlv_handler; + spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + tvlv_handler = batadv_tvlv_handler_get(bat_priv, type, version); if (tvlv_handler) { + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); batadv_tvlv_handler_put(tvlv_handler); return; } tvlv_handler = kzalloc(sizeof(*tvlv_handler), GFP_ATOMIC); - if (!tvlv_handler) + if (!tvlv_handler) { + spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); return; + } tvlv_handler->ogm_handler = optr; tvlv_handler->unicast_handler = uptr; @@ -547,7 +552,6 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, kref_init(&tvlv_handler->refcount); INIT_HLIST_NODE(&tvlv_handler->list); - spin_lock_bh(&bat_priv->tvlv.handler_list_lock); kref_get(&tvlv_handler->refcount); hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); -- cgit v1.2.3-58-ga151 From 5af96b9c59c72fb2af2d19c5cc2f3cdcee391dff Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 7 Sep 2018 05:45:54 +0800 Subject: batman-adv: fix backbone_gw refcount on queue_work() failure The backbone_gw refcounter is to be decreased by the queued work and currently is never decreased if the queue_work() call fails. Fix by checking the queue_work() return value and decrease refcount if necessary. Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bridge_loop_avoidance.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ff9659af6b91..5f1aeeded0e3 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1772,6 +1772,7 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, { struct batadv_bla_backbone_gw *backbone_gw; struct ethhdr *ethhdr; + bool ret; ethhdr = eth_hdr(skb); @@ -1795,8 +1796,13 @@ batadv_bla_loopdetect_check(struct batadv_priv *bat_priv, struct sk_buff *skb, if (unlikely(!backbone_gw)) return true; - queue_work(batadv_event_workqueue, &backbone_gw->report_work); - /* backbone_gw is unreferenced in the report work function function */ + ret = queue_work(batadv_event_workqueue, &backbone_gw->report_work); + + /* backbone_gw is unreferenced in the report work function function + * if queue_work() call was successful + */ + if (!ret) + batadv_backbone_gw_put(backbone_gw); return true; } -- cgit v1.2.3-58-ga151 From 4c4af6900844ab04c9434c972021d7b48610e06a Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 7 Sep 2018 05:45:55 +0800 Subject: batman-adv: fix hardif_neigh refcount on queue_work() failure The hardif_neigh refcounter is to be decreased by the queued work and currently is never decreased if the queue_work() call fails. Fix by checking the queue_work() return value and decrease refcount if necessary. Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/bat_v_elp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index e103c759b7ab..9f481cfdf77d 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -268,6 +268,7 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) struct batadv_priv *bat_priv; struct sk_buff *skb; u32 elp_interval; + bool ret; bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); @@ -329,8 +330,11 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) * may sleep and that is not allowed in an rcu protected * context. Therefore schedule a task for that. */ - queue_work(batadv_event_workqueue, - &hardif_neigh->bat_v.metric_work); + ret = queue_work(batadv_event_workqueue, + &hardif_neigh->bat_v.metric_work); + + if (!ret) + batadv_hardif_neigh_put(hardif_neigh); } rcu_read_unlock(); -- cgit v1.2.3-58-ga151 From dabeb13eee81329338b1f8f330dfcc37a86714d7 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 14 Sep 2018 10:47:14 +0200 Subject: batman-adv: Increase version number to 2018.3 Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 8da3c9336111..3ccc75ee719c 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -25,7 +25,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2018.2" +#define BATADV_SOURCE_VERSION "2018.3" #endif /* B.A.T.M.A.N. parameters */ -- cgit v1.2.3-58-ga151 From 674d9de02aa7d521ebdf66c3958758bdd9c64e11 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 17 Sep 2018 15:51:40 +0200 Subject: NFC: Fix possible memory corruption when handling SHDLC I-Frame commands When handling SHDLC I-Frame commands "pipe" field used for indexing into an array should be checked before usage. If left unchecked it might access memory outside of the array of size NFC_HCI_MAX_PIPES(127). Malformed NFC HCI frames could be injected by a malicious NFC device communicating with the device being attacked (remote attack vector), or even by an attacker with physical access to the I2C bus such that they could influence the data transfers on that bus (local attack vector). skb->data is controlled by the attacker and has only been sanitized in the most trivial ways (CRC check), therefore we can consider the create_info struct and all of its members to tainted. 'create_info->pipe' with max value of 255 (uint8) is used to take an offset of the hdev->pipes array of 127 elements which can lead to OOB write. Cc: Samuel Ortiz Cc: Allen Pais Cc: "David S. Miller" Suggested-by: Kevin Deus Signed-off-by: Suren Baghdasaryan Acked-by: Kees Cook Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- net/nfc/hci/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index ac8030c4bcf8..19cb2e473ea6 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -209,6 +209,11 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, } create_info = (struct hci_create_pipe_resp *)skb->data; + if (create_info->pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + /* Save the new created pipe and bind with local gate, * the description for skb->data[3] is destination gate id * but since we received this cmd from host controller, we @@ -232,6 +237,11 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, } delete_info = (struct hci_delete_pipe_noti *)skb->data; + if (delete_info->pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + hdev->pipes[delete_info->pipe].gate = NFC_HCI_INVALID_GATE; hdev->pipes[delete_info->pipe].dest_host = NFC_HCI_INVALID_HOST; break; -- cgit v1.2.3-58-ga151 From 648a5a7aed346c3b8fe7c32a835edfb0dfbf4451 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 18 Sep 2018 15:46:34 +0200 Subject: net/smc: fix non-blocking connect problem In state SMC_INIT smc_poll() delegates polling to the internal CLC socket. This means, once the connect worker has finished its kernel_connect() step, the poll wake-up may occur. This is not intended. The wake-up should occur from the wake up call in smc_connect_work() after __smc_connect() has finished. Thus in state SMC_INIT this patch now calls sock_poll_wait() on the main SMC socket. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 2d8a1e15e4f9..9c3976bcde46 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -742,7 +742,10 @@ static void smc_connect_work(struct work_struct *work) smc->sk.sk_err = -rc; out: - smc->sk.sk_state_change(&smc->sk); + if (smc->sk.sk_err) + smc->sk.sk_state_change(&smc->sk); + else + smc->sk.sk_write_space(&smc->sk); kfree(smc->connect_info); smc->connect_info = NULL; release_sock(&smc->sk); @@ -1529,7 +1532,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, return EPOLLNVAL; smc = smc_sk(sock->sk); - if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { + if (smc->use_fallback) { /* delegate to CLC child sock */ mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); sk->sk_err = smc->clcsock->sk->sk_err; -- cgit v1.2.3-58-ga151 From 1ca52fcfaca43665d525645348801a6f4a4b9e9a Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 18 Sep 2018 15:46:35 +0200 Subject: net/smc: remove duplicate mutex_unlock For a failing smc_listen_rdma_finish() smc_listen_decline() is called. If fallback is possible, the new socket is already enqueued to be accepted in smc_listen_decline(). Avoid enqueuing a second time afterwards in this case, otherwise the smc_create_lgr_pending lock is released twice: [ 373.463976] WARNING: bad unlock balance detected! [ 373.463978] 4.18.0-rc7+ #123 Tainted: G O [ 373.463979] ------------------------------------- [ 373.463980] kworker/1:1/30 is trying to release lock (smc_create_lgr_pending) at: [ 373.463990] [<000003ff801205fc>] smc_listen_work+0x22c/0x5d0 [smc] [ 373.463991] but there are no more locks to release! [ 373.463991] other info that might help us debug this: [ 373.463993] 2 locks held by kworker/1:1/30: [ 373.463994] #0: 00000000772cbaed ((wq_completion)"events"){+.+.}, at: process_one_work+0x1ec/0x6b0 [ 373.464000] #1: 000000003ad0894a ((work_completion)(&new_smc->smc_listen_work)){+.+.}, at: process_one_work+0x1ec/0x6b0 [ 373.464003] stack backtrace: [ 373.464005] CPU: 1 PID: 30 Comm: kworker/1:1 Kdump: loaded Tainted: G O 4.18.0-rc7uschi+ #123 [ 373.464007] Hardware name: IBM 2827 H43 738 (LPAR) [ 373.464010] Workqueue: events smc_listen_work [smc] [ 373.464011] Call Trace: [ 373.464015] ([<0000000000114100>] show_stack+0x60/0xd8) [ 373.464019] [<0000000000a8c9bc>] dump_stack+0x9c/0xd8 [ 373.464021] [<00000000001dcaf8>] print_unlock_imbalance_bug+0xf8/0x108 [ 373.464022] [<00000000001e045c>] lock_release+0x114/0x4f8 [ 373.464025] [<0000000000aa87fa>] __mutex_unlock_slowpath+0x4a/0x300 [ 373.464027] [<000003ff801205fc>] smc_listen_work+0x22c/0x5d0 [smc] [ 373.464029] [<0000000000197a68>] process_one_work+0x2a8/0x6b0 [ 373.464030] [<0000000000197ec2>] worker_thread+0x52/0x410 [ 373.464033] [<000000000019fd0e>] kthread+0x15e/0x178 [ 373.464035] [<0000000000aaf58a>] kernel_thread_starter+0x6/0xc [ 373.464052] [<0000000000aaf584>] kernel_thread_starter+0x0/0xc [ 373.464054] INFO: lockdep is turned off. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9c3976bcde46..5c6d30eb4a71 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1153,9 +1153,9 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) } /* listen worker: finish RDMA setup */ -static void smc_listen_rdma_finish(struct smc_sock *new_smc, - struct smc_clc_msg_accept_confirm *cclc, - int local_contact) +static int smc_listen_rdma_finish(struct smc_sock *new_smc, + struct smc_clc_msg_accept_confirm *cclc, + int local_contact) { struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; int reason_code = 0; @@ -1178,11 +1178,12 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc, if (reason_code) goto decline; } - return; + return 0; decline: mutex_unlock(&smc_create_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); + return reason_code; } /* setup for RDMA connection of server */ @@ -1279,8 +1280,10 @@ static void smc_listen_work(struct work_struct *work) } /* finish worker */ - if (!ism_supported) - smc_listen_rdma_finish(new_smc, &cclc, local_contact); + if (!ism_supported) { + if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) + return; + } smc_conn_save_peer_info(new_smc, &cclc); mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); -- cgit v1.2.3-58-ga151 From dd65d87a6abd537ae9bb3bdcee8905704b936884 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 18 Sep 2018 15:46:36 +0200 Subject: net/smc: enable fallback for connection abort in state INIT If a linkgroup is terminated abnormally already due to failing LLC CONFIRM LINK or LLC ADD LINK, fallback to TCP is still possible. In this case do not switch to state SMC_PEERABORTWAIT and do not set sk_err. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_close.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index ac961dfb1ea1..ea2b87f29469 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -100,15 +100,14 @@ static void smc_close_active_abort(struct smc_sock *smc) struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - sk->sk_err = ECONNABORTED; - if (smc->clcsock && smc->clcsock->sk) { - smc->clcsock->sk->sk_err = ECONNABORTED; - smc->clcsock->sk->sk_state_change(smc->clcsock->sk); + if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { + sk->sk_err = ECONNABORTED; + if (smc->clcsock && smc->clcsock->sk) { + smc->clcsock->sk->sk_err = ECONNABORTED; + smc->clcsock->sk->sk_state_change(smc->clcsock->sk); + } } switch (sk->sk_state) { - case SMC_INIT: - sk->sk_state = SMC_PEERABORTWAIT; - break; case SMC_ACTIVE: sk->sk_state = SMC_PEERABORTWAIT; release_sock(sk); @@ -143,6 +142,7 @@ static void smc_close_active_abort(struct smc_sock *smc) case SMC_PEERFINCLOSEWAIT: sock_put(sk); /* passive closing */ break; + case SMC_INIT: case SMC_PEERABORTWAIT: case SMC_CLOSED: break; -- cgit v1.2.3-58-ga151 From 71d117f527425e2d6a5029e8365d82a8d2d6916a Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 18 Sep 2018 15:46:37 +0200 Subject: net/smc: no urgent data check for listen sockets Don't check a listen socket for pending urgent data in smc_poll(). Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5c6d30eb4a71..015231789ed2 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1566,9 +1566,9 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; if (sk->sk_state == SMC_APPCLOSEWAIT1) mask |= EPOLLIN; + if (smc->conn.urg_state == SMC_URG_VALID) + mask |= EPOLLPRI; } - if (smc->conn.urg_state == SMC_URG_VALID) - mask |= EPOLLPRI; } return mask; -- cgit v1.2.3-58-ga151 From 381897798a94065ffcad0772eecdc6b04a7ff23d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 18 Sep 2018 15:46:38 +0200 Subject: net/smc: fix sizeof to int comparison Comparing an int to a size, which is unsigned, causes the int to become unsigned, giving the wrong result. kernel_sendmsg can return a negative error code. Signed-off-by: YueHaibing Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 83aba9ade060..52241d679cc9 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -446,14 +446,12 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, vec[i++].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); - if (len < sizeof(pclc)) { - if (len >= 0) { - reason_code = -ENETUNREACH; - smc->sk.sk_err = -reason_code; - } else { - smc->sk.sk_err = smc->clcsock->sk->sk_err; - reason_code = -smc->sk.sk_err; - } + if (len < 0) { + smc->sk.sk_err = smc->clcsock->sk->sk_err; + reason_code = -smc->sk.sk_err; + } else if (len < (int)sizeof(pclc)) { + reason_code = -ENETUNREACH; + smc->sk.sk_err = -reason_code; } return reason_code; -- cgit v1.2.3-58-ga151 From 8675860592321a43bbb0b5aedc244a9b2321edc3 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 18 Sep 2018 13:44:59 -0700 Subject: Revert "ipv6: fix double refcount of fib6_metrics" This reverts commit e70a3aad44cc8b24986687ffc98c4a4f6ecf25ea. This change causes use-after-free on dst->_metrics. The crash trace looks like this: [ 97.763269] BUG: KASAN: use-after-free in ip6_mtu+0x116/0x140 [ 97.769038] Read of size 4 at addr ffff881781d2cf84 by task svw_NetThreadEv/8801 [ 97.777954] CPU: 76 PID: 8801 Comm: svw_NetThreadEv Not tainted 4.15.0-smp-DEV #11 [ 97.777956] Hardware name: Default string Default string/Indus_QC_02, BIOS 5.46.4 03/29/2018 [ 97.777957] Call Trace: [ 97.777971] [] dump_stack+0x4d/0x72 [ 97.777985] [] print_address_description+0x6f/0x260 [ 97.777997] [] kasan_report+0x257/0x370 [ 97.778001] [] ? ip6_mtu+0x116/0x140 [ 97.778004] [] __asan_report_load4_noabort+0x19/0x20 [ 97.778008] [] ip6_mtu+0x116/0x140 [ 97.778013] [] tcp_current_mss+0x12e/0x280 [ 97.778016] [] ? tcp_mtu_to_mss+0x2d0/0x2d0 [ 97.778022] [] ? depot_save_stack+0x138/0x4a0 [ 97.778037] [] ? __mmdrop+0x145/0x1f0 [ 97.778040] [] ? save_stack+0xb1/0xd0 [ 97.778046] [] tcp_send_mss+0x22/0x220 [ 97.778059] [] tcp_sendmsg_locked+0x4f9/0x39f0 [ 97.778062] [] ? kasan_check_write+0x14/0x20 [ 97.778066] [] ? tcp_sendpage+0x60/0x60 [ 97.778070] [] ? rw_copy_check_uvector+0x69/0x280 [ 97.778075] [] ? import_iovec+0x9f/0x430 [ 97.778078] [] ? kasan_slab_free+0x87/0xc0 [ 97.778082] [] ? memzero_page+0x140/0x140 [ 97.778085] [] ? kasan_check_write+0x14/0x20 [ 97.778088] [] tcp_sendmsg+0x2c/0x50 [ 97.778092] [] ? tcp_sendmsg+0x2c/0x50 [ 97.778098] [] inet_sendmsg+0x103/0x480 [ 97.778102] [] ? inet_gso_segment+0x15b0/0x15b0 [ 97.778105] [] sock_sendmsg+0xba/0xf0 [ 97.778108] [] ___sys_sendmsg+0x6ca/0x8e0 [ 97.778113] [] ? hrtimer_try_to_cancel+0x71/0x3b0 [ 97.778116] [] ? copy_msghdr_from_user+0x3d0/0x3d0 [ 97.778119] [] ? memset+0x31/0x40 [ 97.778123] [] ? schedule_hrtimeout_range_clock+0x165/0x380 [ 97.778127] [] ? hrtimer_nanosleep_restart+0x250/0x250 [ 97.778130] [] ? __hrtimer_init+0x180/0x180 [ 97.778133] [] ? ktime_get_ts64+0x172/0x200 [ 97.778137] [] ? __fget_light+0x8c/0x2f0 [ 97.778141] [] __sys_sendmsg+0xe6/0x190 [ 97.778144] [] ? __sys_sendmsg+0xe6/0x190 [ 97.778147] [] ? SyS_shutdown+0x20/0x20 [ 97.778152] [] ? wake_up_q+0xe0/0xe0 [ 97.778155] [] ? __sys_sendmsg+0x190/0x190 [ 97.778158] [] SyS_sendmsg+0x13/0x20 [ 97.778162] [] do_syscall_64+0x2ac/0x430 [ 97.778166] [] ? do_page_fault+0x35/0x3d0 [ 97.778171] [] ? page_fault+0x2f/0x50 [ 97.778174] [] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 97.778177] RIP: 0033:0x7f83fa36000d [ 97.778178] RSP: 002b:00007f83ef9229e0 EFLAGS: 00000293 ORIG_RAX: 000000000000002e [ 97.778180] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f83fa36000d [ 97.778182] RDX: 0000000000004000 RSI: 00007f83ef922f00 RDI: 0000000000000036 [ 97.778183] RBP: 00007f83ef923040 R08: 00007f83ef9231f8 R09: 00007f83ef923168 [ 97.778184] R10: 0000000000000000 R11: 0000000000000293 R12: 00007f83f69c5b40 [ 97.778185] R13: 000000000000001c R14: 0000000000000001 R15: 0000000000004000 [ 97.779684] Allocated by task 5919: [ 97.783185] save_stack+0x46/0xd0 [ 97.783187] kasan_kmalloc+0xad/0xe0 [ 97.783189] kmem_cache_alloc_trace+0xdf/0x580 [ 97.783190] ip6_convert_metrics.isra.79+0x7e/0x190 [ 97.783192] ip6_route_info_create+0x60a/0x2480 [ 97.783193] ip6_route_add+0x1d/0x80 [ 97.783195] inet6_rtm_newroute+0xdd/0xf0 [ 97.783198] rtnetlink_rcv_msg+0x641/0xb10 [ 97.783200] netlink_rcv_skb+0x27b/0x3e0 [ 97.783202] rtnetlink_rcv+0x15/0x20 [ 97.783203] netlink_unicast+0x4be/0x720 [ 97.783204] netlink_sendmsg+0x7bc/0xbf0 [ 97.783205] sock_sendmsg+0xba/0xf0 [ 97.783207] ___sys_sendmsg+0x6ca/0x8e0 [ 97.783208] __sys_sendmsg+0xe6/0x190 [ 97.783209] SyS_sendmsg+0x13/0x20 [ 97.783211] do_syscall_64+0x2ac/0x430 [ 97.783213] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 97.784709] Freed by task 0: [ 97.785056] knetbase: Error: /proc/sys/net/core/txcs_enable does not exist [ 97.794497] save_stack+0x46/0xd0 [ 97.794499] kasan_slab_free+0x71/0xc0 [ 97.794500] kfree+0x7c/0xf0 [ 97.794501] fib6_info_destroy_rcu+0x24f/0x310 [ 97.794504] rcu_process_callbacks+0x38b/0x1730 [ 97.794506] __do_softirq+0x1c8/0x5d0 Reported-by: John Sperbeck Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 480a79f47c52..b5d3e6b294ab 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -976,6 +976,10 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) rt->rt6i_flags &= ~RTF_EXPIRES; rcu_assign_pointer(rt->from, from); dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); + if (from->fib6_metrics != &dst_default_metrics) { + rt->dst._metrics |= DST_METRICS_REFCOUNTED; + refcount_inc(&from->fib6_metrics->refcnt); + } } /* Caller must already hold reference to @ort */ -- cgit v1.2.3-58-ga151 From ce7ea4af0838ffd4667ecad4eb5eec7a25342f1e Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 18 Sep 2018 13:45:00 -0700 Subject: ipv6: fix memory leak on dst->_metrics When dst->_metrics and f6i->fib6_metrics share the same memory, both take reference count on the dst_metrics structure. However, when dst is destroyed, ip6_dst_destroy() only invokes dst_destroy_metrics_generic() which does not take care of READONLY metrics and does not release refcnt. This causes memory leak. Similar to ipv4 logic, the fix is to properly release refcnt and free the memory space pointed by dst->_metrics if refcnt becomes 0. Fixes: 93531c674315 ("net/ipv6: separate handling of FIB entries from dst based routes") Reported-by: Sabrina Dubroca Signed-off-by: Wei Wang Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b5d3e6b294ab..826b14de7dbb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -364,11 +364,14 @@ EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { + struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rt6_info *rt = (struct rt6_info *)dst; struct fib6_info *from; struct inet6_dev *idev; - dst_destroy_metrics_generic(dst); + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) + kfree(p); + rt6_uncached_list_del(rt); idev = rt->rt6i_idev; -- cgit v1.2.3-58-ga151 From 76c0ddd8c3a683f6e2c6e60e11dc1a1558caf4bc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 19 Sep 2018 15:02:07 +0200 Subject: ip6_tunnel: be careful when accessing the inner header the ip6 tunnel xmit ndo assumes that the processed skb always contains an ip[v6] header, but syzbot has found a way to send frames that fall short of this assumption, leading to the following splat: BUG: KMSAN: uninit-value in ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] BUG: KMSAN: uninit-value in ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 CPU: 0 PID: 4504 Comm: syz-executor558 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 ip6ip6_tnl_xmit net/ipv6/ip6_tunnel.c:1307 [inline] ip6_tnl_start_xmit+0x7d2/0x1ef0 net/ipv6/ip6_tunnel.c:1390 __netdev_start_xmit include/linux/netdevice.h:4066 [inline] netdev_start_xmit include/linux/netdevice.h:4075 [inline] xmit_one net/core/dev.c:3026 [inline] dev_hard_start_xmit+0x5f1/0xc70 net/core/dev.c:3042 __dev_queue_xmit+0x27ee/0x3520 net/core/dev.c:3557 dev_queue_xmit+0x4b/0x60 net/core/dev.c:3590 packet_snd net/packet/af_packet.c:2944 [inline] packet_sendmsg+0x7c70/0x8a30 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 SyS_sendmmsg+0x63/0x90 net/socket.c:2162 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x441819 RSP: 002b:00007ffe58ee8268 EFLAGS: 00000213 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000441819 RDX: 0000000000000002 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006cd018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000213 R12: 0000000000402510 R13: 00000000004025a0 R14: 0000000000000000 R15: 0000000000000000 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] alloc_skb_with_frags+0x1d4/0xb20 net/core/skbuff.c:5234 sock_alloc_send_pskb+0xb56/0x1190 net/core/sock.c:2085 packet_alloc_skb net/packet/af_packet.c:2803 [inline] packet_snd net/packet/af_packet.c:2894 [inline] packet_sendmsg+0x6454/0x8a30 net/packet/af_packet.c:2969 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmmsg+0x42d/0x800 net/socket.c:2136 SYSC_sendmmsg+0xc4/0x110 net/socket.c:2167 SyS_sendmmsg+0x63/0x90 net/socket.c:2162 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 This change addresses the issue adding the needed check before accessing the inner header. The ipv4 side of the issue is apparently there since the ipv4 over ipv6 initial support, and the ipv6 side predates git history. Fixes: c4d3efafcc93 ("[IPV6] IP6TUNNEL: Add support to IPv4 over IPv6 tunnel.") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+3fde91d4d394747d6db4@syzkaller.appspotmail.com Tested-by: Alexander Potapenko Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 419960b0ba16..a0b6932c3afd 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1234,7 +1234,7 @@ static inline int ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; int encap_limit = -1; struct flowi6 fl6; __u8 dsfield; @@ -1242,6 +1242,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + /* ensure we can access the full inner ip header */ + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return -1; + + iph = ip_hdr(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); tproto = READ_ONCE(t->parms.proto); @@ -1306,7 +1311,7 @@ static inline int ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h; int encap_limit = -1; __u16 offset; struct flowi6 fl6; @@ -1315,6 +1320,10 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; + if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) + return -1; + + ipv6h = ipv6_hdr(skb); tproto = READ_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || ip6_tnl_addr_conflict(t, ipv6h)) -- cgit v1.2.3-58-ga151 From 56ce3c5a50f4d8cc95361b1ec7f152006c6320d8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 20 Sep 2018 09:27:30 +0200 Subject: smc: generic netlink family should be __ro_after_init The generic netlink family is only initialized during module init, so it should be __ro_after_init like all other generic netlink families. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/smc/smc_pnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 01c6ce042a1c..7cb3e4f07c10 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -461,7 +461,7 @@ static const struct genl_ops smc_pnet_ops[] = { }; /* SMC_PNETID family definition */ -static struct genl_family smc_pnet_nl_family = { +static struct genl_family smc_pnet_nl_family __ro_after_init = { .hdrsize = 0, .name = SMCR_GENL_FAMILY_NAME, .version = SMCR_GENL_FAMILY_VERSION, -- cgit v1.2.3-58-ga151 From d7ab5cdce54da631f0c8c11e506c974536a3581e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 20 Sep 2018 17:27:28 +0800 Subject: sctp: update dst pmtu with the correct daddr When processing pmtu update from an icmp packet, it calls .update_pmtu with sk instead of skb in sctp_transport_update_pmtu. However for sctp, the daddr in the transport might be different from inet_sock->inet_daddr or sk->sk_v6_daddr, which is used to update or create the route cache. The incorrect daddr will cause a different route cache created for the path. So before calling .update_pmtu, inet_sock->inet_daddr/sk->sk_v6_daddr should be updated with the daddr in the transport, and update it back after it's done. The issue has existed since route exceptions introduction. Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.") Reported-by: ian.periam@dialogic.com Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/transport.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 12cac85da994..033696e6f74f 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -260,6 +260,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) { struct dst_entry *dst = sctp_transport_dst_check(t); + struct sock *sk = t->asoc->base.sk; bool change = true; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { @@ -271,12 +272,19 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) pmtu = SCTP_TRUNC4(pmtu); if (dst) { - dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu); + struct sctp_pf *pf = sctp_get_pf_specific(dst->ops->family); + union sctp_addr addr; + + pf->af->from_sk(&addr, sk); + pf->to_sk_daddr(&t->ipaddr, sk); + dst->ops->update_pmtu(dst, sk, NULL, pmtu); + pf->to_sk_daddr(&addr, sk); + dst = sctp_transport_dst_check(t); } if (!dst) { - t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk); + t->af_specific->get_dst(t, &t->saddr, &t->fl, sk); dst = t->dst; } -- cgit v1.2.3-58-ga151 From f88b4c01b97e09535505cf3c327fdbce55c27f00 Mon Sep 17 00:00:00 2001 From: Sean Tranchetti Date: Thu, 20 Sep 2018 14:29:45 -0600 Subject: netlabel: check for IPV4MASK in addrinfo_get netlbl_unlabel_addrinfo_get() assumes that if it finds the NLBL_UNLABEL_A_IPV4ADDR attribute, it must also have the NLBL_UNLABEL_A_IPV4MASK attribute as well. However, this is not necessarily the case as the current checks in netlbl_unlabel_staticadd() and friends are not sufficent to enforce this. If passed a netlink message with NLBL_UNLABEL_A_IPV4ADDR, NLBL_UNLABEL_A_IPV6ADDR, and NLBL_UNLABEL_A_IPV6MASK attributes, these functions will all call netlbl_unlabel_addrinfo_get() which will then attempt dereference NULL when fetching the non-existent NLBL_UNLABEL_A_IPV4MASK attribute: Unable to handle kernel NULL pointer dereference at virtual address 0 Process unlab (pid: 31762, stack limit = 0xffffff80502d8000) Call trace: netlbl_unlabel_addrinfo_get+0x44/0xd8 netlbl_unlabel_staticremovedef+0x98/0xe0 genl_rcv_msg+0x354/0x388 netlink_rcv_skb+0xac/0x118 genl_rcv+0x34/0x48 netlink_unicast+0x158/0x1f0 netlink_sendmsg+0x32c/0x338 sock_sendmsg+0x44/0x60 ___sys_sendmsg+0x1d0/0x2a8 __sys_sendmsg+0x64/0xb4 SyS_sendmsg+0x34/0x4c el0_svc_naked+0x34/0x38 Code: 51001149 7100113f 540000a0 f9401508 (79400108) ---[ end trace f6438a488e737143 ]--- Kernel panic - not syncing: Fatal exception Signed-off-by: Sean Tranchetti Signed-off-by: David S. Miller --- net/netlabel/netlabel_unlabeled.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index c070dfc0190a..c92894c3e40a 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -781,7 +781,8 @@ static int netlbl_unlabel_addrinfo_get(struct genl_info *info, { u32 addr_len; - if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR]) { + if (info->attrs[NLBL_UNLABEL_A_IPV4ADDR] && + info->attrs[NLBL_UNLABEL_A_IPV4MASK]) { addr_len = nla_len(info->attrs[NLBL_UNLABEL_A_IPV4ADDR]); if (addr_len != sizeof(struct in_addr) && addr_len != nla_len(info->attrs[NLBL_UNLABEL_A_IPV4MASK])) -- cgit v1.2.3-58-ga151 From 86f9bd1ff61c413a2a251fa736463295e4e24733 Mon Sep 17 00:00:00 2001 From: Jeff Barnhill <0xeffeff@gmail.com> Date: Fri, 21 Sep 2018 00:45:27 +0000 Subject: net/ipv6: Display all addresses in output of /proc/net/if_inet6 The backend handling for /proc/net/if_inet6 in addrconf.c doesn't properly handle starting/stopping the iteration. The problem is that at some point during the iteration, an overflow is detected and the process is subsequently stopped. The item being shown via seq_printf() when the overflow occurs is not actually shown, though. When start() is subsequently called to resume iterating, it returns the next item, and thus the item that was being processed when the overflow occurred never gets printed. Alter the meaning of the private data member "offset". Currently, when it is not 0 (which only happens at the very beginning), "offset" represents the next hlist item to be printed. After this change, "offset" always represents the current item. This is also consistent with the private data member "bucket", which represents the current bucket, and also the use of "pos" as defined in seq_file.txt: The pos passed to start() will always be either zero, or the most recent pos used in the previous session. Signed-off-by: Jeff Barnhill <0xeffeff@gmail.com> Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d51a8c0b3372..c63ccce6425f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4201,7 +4201,6 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) p++; continue; } - state->offset++; return ifa; } @@ -4225,13 +4224,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, return ifa; } + state->offset = 0; while (++state->bucket < IN6_ADDR_HSIZE) { - state->offset = 0; hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; - state->offset++; return ifa; } } -- cgit v1.2.3-58-ga151 From 83fe9a966111b51a34f10c35e568e45bff34de48 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Sep 2018 11:07:55 +0300 Subject: devlink: double free in devlink_resource_fill() Smatch reports that devlink_dpipe_send_and_alloc_skb() frees the skb on error so this is a double free. We fixed a bunch of these bugs in commit 7fe4d6dcbcb4 ("devlink: Remove redundant free on error path") but we accidentally overlooked this one. Fixes: d9f9b9a4d05f ("devlink: Add support for resource abstraction") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- net/core/devlink.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/devlink.c b/net/core/devlink.c index 65fc366a78a4..8c0ed225e280 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2592,7 +2592,7 @@ send_done: if (!nlh) { err = devlink_dpipe_send_and_alloc_skb(&skb, info); if (err) - goto err_skb_send_alloc; + return err; goto send_done; } return genlmsg_reply(skb, info); @@ -2600,7 +2600,6 @@ send_done: nla_put_failure: err = -EMSGSIZE; err_resource_put: -err_skb_send_alloc: nlmsg_free(skb); return err; } -- cgit v1.2.3-58-ga151 From 8360ed6745df6de2d8ddff8e2116789258fe5890 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 21 Sep 2018 11:04:51 -0700 Subject: RDS: IB: Use DEFINE_PER_CPU_SHARED_ALIGNED for rds_ib_stats Clang warns when two declarations' section attributes don't match. net/rds/ib_stats.c:40:1: warning: section does not match previous declaration [-Wsection] DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); ^ ./include/linux/percpu-defs.h:142:2: note: expanded from macro 'DEFINE_PER_CPU_SHARED_ALIGNED' DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ ^ ./include/linux/percpu-defs.h:93:9: note: expanded from macro 'DEFINE_PER_CPU_SECTION' extern __PCPU_ATTRS(sec) __typeof__(type) name; \ ^ ./include/linux/percpu-defs.h:49:26: note: expanded from macro '__PCPU_ATTRS' __percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \ ^ net/rds/ib.h:446:1: note: previous attribute is here DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); ^ ./include/linux/percpu-defs.h:111:2: note: expanded from macro 'DECLARE_PER_CPU' DECLARE_PER_CPU_SECTION(type, name, "") ^ ./include/linux/percpu-defs.h:87:9: note: expanded from macro 'DECLARE_PER_CPU_SECTION' extern __PCPU_ATTRS(sec) __typeof__(type) name ^ ./include/linux/percpu-defs.h:49:26: note: expanded from macro '__PCPU_ATTRS' __percpu __attribute__((section(PER_CPU_BASE_SECTION sec))) \ ^ 1 warning generated. The initial definition was added in commit ec16227e1414 ("RDS/IB: Infiniband transport") and the cache aligned definition was added in commit e6babe4cc4ce ("RDS/IB: Stats and sysctls") right after. The definition probably should have been updated in net/rds/ib.h, which is what this patch does. Link: https://github.com/ClangBuiltLinux/linux/issues/114 Signed-off-by: Nathan Chancellor Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/ib.h b/net/rds/ib.h index 73427ff439f9..fd483760c910 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -443,7 +443,7 @@ int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op); /* ib_stats.c */ -DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); +DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); #define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member) #define rds_ib_stats_add(member, count) \ rds_stats_add_which(rds_ib_stats, member, count) -- cgit v1.2.3-58-ga151 From 474ff2600889e16280dbc6ada8bfecb216169a70 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Sat, 22 Sep 2018 01:34:01 -0700 Subject: net-ethtool: ETHTOOL_GUFO did not and should not require CAP_NET_ADMIN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So it should not fail with EPERM even though it is no longer implemented... This is a fix for: (userns)$ egrep ^Cap /proc/self/status CapInh: 0000003fffffffff CapPrm: 0000003fffffffff CapEff: 0000003fffffffff CapBnd: 0000003fffffffff CapAmb: 0000003fffffffff (userns)$ tcpdump -i usb_rndis0 tcpdump: WARNING: usb_rndis0: SIOCETHTOOL(ETHTOOL_GUFO) ioctl failed: Operation not permitted Warning: Kernel filter failed: Bad file descriptor tcpdump: can't remove kernel filter: Bad file descriptor With this change it returns EOPNOTSUPP instead of EPERM. See also https://github.com/the-tcpdump-group/libpcap/issues/689 Fixes: 08a00fea6de2 "net: Remove references to NETIF_F_UFO from ethtool." Cc: David S. Miller Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- net/core/ethtool.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c9993c6c2fd4..234a0ec2e932 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2624,6 +2624,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: + case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: case ETHTOOL_GFLAGS: -- cgit v1.2.3-58-ga151 From 16fdf8ba98391650ce4bc4f3f71629d8a413bc21 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 23 Sep 2018 12:25:15 -0700 Subject: rds: Fix build regression. Use DECLARE_* not DEFINE_* Fixes: 8360ed6745df ("RDS: IB: Use DEFINE_PER_CPU_SHARED_ALIGNED for rds_ib_stats") Signed-off-by: David S. Miller --- net/rds/ib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/ib.h b/net/rds/ib.h index fd483760c910..71ff356ee702 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -443,7 +443,7 @@ int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op); /* ib_stats.c */ -DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); +DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); #define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member) #define rds_ib_stats_add(member, count) \ rds_stats_add_which(rds_ib_stats, member, count) -- cgit v1.2.3-58-ga151 From ac3d9dd034e565df2c034ab2ca71f0a9f69153c1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Sep 2018 15:27:38 -0700 Subject: netpoll: make ndo_poll_controller() optional As diagnosed by Song Liu, ndo_poll_controller() can be very dangerous on loaded hosts, since the cpu calling ndo_poll_controller() might steal all NAPI contexts (for all RX/TX queues of the NIC). This capture can last for unlimited amount of time, since one cpu is generally not able to drain all the queues under load. It seems that all networking drivers that do use NAPI for their TX completions, should not provide a ndo_poll_controller(). NAPI drivers have netpoll support already handled in core networking stack, since netpoll_poll_dev() uses poll_napi(dev) to iterate through registered NAPI contexts for a device. This patch allows netpoll_poll_dev() to process NAPI contexts even for drivers not providing ndo_poll_controller(), allowing for following patches in NAPI drivers. Also we export netpoll_poll_dev() so that it can be called by bonding/team drivers in following patches. Reported-by: Song Liu Signed-off-by: Eric Dumazet Tested-by: Song Liu Signed-off-by: David S. Miller --- include/linux/netpoll.h | 5 +++-- net/core/netpoll.c | 19 +++++++------------ 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index 67662d01130a..3ef82d3a78db 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -49,8 +49,9 @@ struct netpoll_info { }; #ifdef CONFIG_NETPOLL -extern void netpoll_poll_disable(struct net_device *dev); -extern void netpoll_poll_enable(struct net_device *dev); +void netpoll_poll_dev(struct net_device *dev); +void netpoll_poll_disable(struct net_device *dev); +void netpoll_poll_enable(struct net_device *dev); #else static inline void netpoll_poll_disable(struct net_device *dev) { return; } static inline void netpoll_poll_enable(struct net_device *dev) { return; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 57557a6a950c..3219a2932463 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -187,16 +187,16 @@ static void poll_napi(struct net_device *dev) } } -static void netpoll_poll_dev(struct net_device *dev) +void netpoll_poll_dev(struct net_device *dev) { - const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); + const struct net_device_ops *ops; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity * while changing device state */ - if (down_trylock(&ni->dev_lock)) + if (!ni || down_trylock(&ni->dev_lock)) return; if (!netif_running(dev)) { @@ -205,13 +205,8 @@ static void netpoll_poll_dev(struct net_device *dev) } ops = dev->netdev_ops; - if (!ops->ndo_poll_controller) { - up(&ni->dev_lock); - return; - } - - /* Process pending work on NIC */ - ops->ndo_poll_controller(dev); + if (ops->ndo_poll_controller) + ops->ndo_poll_controller(dev); poll_napi(dev); @@ -219,6 +214,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } +EXPORT_SYMBOL(netpoll_poll_dev); void netpoll_poll_disable(struct net_device *dev) { @@ -613,8 +609,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) strlcpy(np->dev_name, ndev->name, IFNAMSIZ); INIT_WORK(&np->cleanup_work, netpoll_async_cleanup); - if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) || - !ndev->netdev_ops->ndo_poll_controller) { + if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", np->dev_name); err = -ENOTSUPP; -- cgit v1.2.3-58-ga151 From d8e2262a5044c1a05b4da51e5d0976f10c487a9f Mon Sep 17 00:00:00 2001 From: Saif Hasan Date: Fri, 21 Sep 2018 14:30:05 -0700 Subject: mpls: allow routes on ip6gre devices Summary: This appears to be necessary and sufficient change to enable `MPLS` on `ip6gre` tunnels (RFC4023). This diff allows IP6GRE devices to be recognized by MPLS kernel module and hence user can configure interface to accept packets with mpls headers as well setup mpls routes on them. Test Plan: Test plan consists of multiple containers connected via GRE-V6 tunnel. Then carrying out testing steps as below. - Carry out necessary sysctl settings on all containers ``` sysctl -w net.mpls.platform_labels=65536 sysctl -w net.mpls.ip_ttl_propagate=1 sysctl -w net.mpls.conf.lo.input=1 ``` - Establish IP6GRE tunnels ``` ip -6 tunnel add name if_1_2_1 mode ip6gre \ local 2401:db00:21:6048:feed:0::1 \ remote 2401:db00:21:6048:feed:0::2 key 1 ip link set dev if_1_2_1 up sysctl -w net.mpls.conf.if_1_2_1.input=1 ip -4 addr add 169.254.0.2/31 dev if_1_2_1 scope link ip -6 tunnel add name if_1_3_1 mode ip6gre \ local 2401:db00:21:6048:feed:0::1 \ remote 2401:db00:21:6048:feed:0::3 key 1 ip link set dev if_1_3_1 up sysctl -w net.mpls.conf.if_1_3_1.input=1 ip -4 addr add 169.254.0.4/31 dev if_1_3_1 scope link ``` - Install MPLS encap rules on node-1 towards node-2 ``` ip route add 192.168.0.11/32 nexthop encap mpls 32/64 \ via inet 169.254.0.3 dev if_1_2_1 ``` - Install MPLS forwarding rules on node-2 and node-3 ``` // node2 ip -f mpls route add 32 via inet 169.254.0.7 dev if_2_4_1 // node3 ip -f mpls route add 64 via inet 169.254.0.12 dev if_4_3_1 ``` - Ping 192.168.0.11 (node4) from 192.168.0.1 (node1) (where routing towards 192.168.0.1 is via IP route directly towards node1 from node4) ``` ping 192.168.0.11 ``` - tcpdump on interface to capture ping packets wrapped within MPLS header which inturn wrapped within IP6GRE header ``` 16:43:41.121073 IP6 2401:db00:21:6048:feed::1 > 2401:db00:21:6048:feed::2: DSTOPT GREv0, key=0x1, length 100: MPLS (label 32, exp 0, ttl 255) (label 64, exp 0, [S], ttl 255) IP 192.168.0.1 > 192.168.0.11: ICMP echo request, id 1208, seq 45, length 64 0x0000: 6000 2cdb 006c 3c3f 2401 db00 0021 6048 `.,..l Signed-off-by: David S. Miller --- net/mpls/af_mpls.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 7a4de6d618b1..8fbe6cdbe255 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1533,10 +1533,14 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, unsigned int flags; if (event == NETDEV_REGISTER) { - /* For now just support Ethernet, IPGRE, SIT and IPIP devices */ + + /* For now just support Ethernet, IPGRE, IP6GRE, SIT and + * IPIP devices + */ if (dev->type == ARPHRD_ETHER || dev->type == ARPHRD_LOOPBACK || dev->type == ARPHRD_IPGRE || + dev->type == ARPHRD_IP6GRE || dev->type == ARPHRD_SIT || dev->type == ARPHRD_TUNNEL) { mdev = mpls_add_dev(dev); -- cgit v1.2.3-58-ga151 From ccfec9e5cb2d48df5a955b7bf47f7782157d3bc2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 24 Sep 2018 15:48:19 +0200 Subject: ip_tunnel: be careful when accessing the inner header Cong noted that we need the same checks introduced by commit 76c0ddd8c3a6 ("ip6_tunnel: be careful when accessing the inner header") even for ipv4 tunnels. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Suggested-by: Cong Wang Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index c4f5602308ed..284a22154b4e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -627,6 +627,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); + unsigned int inner_nhdr_len = 0; const struct iphdr *inner_iph; struct flowi4 fl4; u8 tos, ttl; @@ -636,6 +637,14 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, __be32 dst; bool connected; + /* ensure we can access the inner net header, for several users below */ + if (skb->protocol == htons(ETH_P_IP)) + inner_nhdr_len = sizeof(struct iphdr); + else if (skb->protocol == htons(ETH_P_IPV6)) + inner_nhdr_len = sizeof(struct ipv6hdr); + if (unlikely(!pskb_may_pull(skb, inner_nhdr_len))) + goto tx_error; + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); connected = (tunnel->parms.iph.daddr != 0); -- cgit v1.2.3-58-ga151